├── .circleci
    ├── config.yml
    └── deploy.sh
├── .coveragerc
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── ---new-benchmark.md
    │   ├── --new-model-addition.md
    │   ├── bug-report.md
    │   ├── feature-request.md
    │   ├── migration.md
    │   └── question-help.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── conda
    │   ├── build.sh
    │   └── meta.yaml
    └── workflows
    │   ├── github-torch-hub.yml
    │   ├── model-templates.yml
    │   ├── release-conda.yml
    │   ├── self-push.yml
    │   ├── self-scheduled.yml
    │   └── stale.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── ISSUES.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── README_hf.md
├── analysis
    ├── paper_plot.py
    └── plot_weights.py
├── docker
    ├── transformers-cpu
    │   └── Dockerfile
    ├── transformers-gpu
    │   └── Dockerfile
    ├── transformers-pytorch-cpu
    │   └── Dockerfile
    ├── transformers-pytorch-gpu
    │   └── Dockerfile
    ├── transformers-pytorch-tpu
    │   ├── Dockerfile
    │   ├── bert-base-cased.jsonnet
    │   ├── dataset.yaml
    │   └── docker-entrypoint.sh
    ├── transformers-tensorflow-cpu
    │   └── Dockerfile
    └── transformers-tensorflow-gpu
    │   └── Dockerfile
├── docs
    ├── Makefile
    ├── README.md
    └── source
    │   ├── _static
    │       ├── css
    │       │   ├── Calibre-Light.ttf
    │       │   ├── Calibre-Medium.otf
    │       │   ├── Calibre-Regular.otf
    │       │   ├── Calibre-Thin.otf
    │       │   ├── code-snippets.css
    │       │   └── huggingface.css
    │       └── js
    │       │   ├── custom.js
    │       │   └── huggingface_logo.svg
    │   ├── add_new_model.rst
    │   ├── benchmarks.rst
    │   ├── bertology.rst
    │   ├── community.md
    │   ├── conf.py
    │   ├── contributing.md
    │   ├── converting_tensorflow_models.rst
    │   ├── custom_datasets.rst
    │   ├── debugging.rst
    │   ├── examples.md
    │   ├── fast_tokenizers.rst
    │   ├── favicon.ico
    │   ├── glossary.rst
    │   ├── imgs
    │       ├── course_banner.png
    │       ├── local_attention_mask.png
    │       ├── ppl_chunked.gif
    │       ├── ppl_full.gif
    │       ├── ppl_sliding.gif
    │       ├── transformers_logo_name.png
    │       ├── transformers_overview.png
    │       ├── warmup_constant_schedule.png
    │       ├── warmup_cosine_hard_restarts_schedule.png
    │       ├── warmup_cosine_schedule.png
    │       ├── warmup_cosine_warm_restarts_schedule.png
    │       └── warmup_linear_schedule.png
    │   ├── index.rst
    │   ├── installation.md
    │   ├── internal
    │       ├── file_utils.rst
    │       ├── generation_utils.rst
    │       ├── modeling_utils.rst
    │       ├── pipelines_utils.rst
    │       ├── tokenization_utils.rst
    │       └── trainer_utils.rst
    │   ├── main_classes
    │       ├── callback.rst
    │       ├── configuration.rst
    │       ├── data_collator.rst
    │       ├── deepspeed.rst
    │       ├── feature_extractor.rst
    │       ├── logging.rst
    │       ├── model.rst
    │       ├── optimizer_schedules.rst
    │       ├── output.rst
    │       ├── pipelines.rst
    │       ├── processors.rst
    │       ├── tokenizer.rst
    │       └── trainer.rst
    │   ├── migration.md
    │   ├── model_doc
    │       ├── albert.rst
    │       ├── auto.rst
    │       ├── bart.rst
    │       ├── barthez.rst
    │       ├── bert.rst
    │       ├── bert_japanese.rst
    │       ├── bertgeneration.rst
    │       ├── bertweet.rst
    │       ├── bigbird.rst
    │       ├── bigbird_pegasus.rst
    │       ├── blenderbot.rst
    │       ├── blenderbot_small.rst
    │       ├── bort.rst
    │       ├── byt5.rst
    │       ├── camembert.rst
    │       ├── clip.rst
    │       ├── convbert.rst
    │       ├── cpm.rst
    │       ├── ctrl.rst
    │       ├── deberta.rst
    │       ├── deberta_v2.rst
    │       ├── deit.rst
    │       ├── detr.rst
    │       ├── dialogpt.rst
    │       ├── distilbert.rst
    │       ├── dpr.rst
    │       ├── electra.rst
    │       ├── encoderdecoder.rst
    │       ├── flaubert.rst
    │       ├── fsmt.rst
    │       ├── funnel.rst
    │       ├── gpt.rst
    │       ├── gpt2.rst
    │       ├── gpt_neo.rst
    │       ├── herbert.rst
    │       ├── hubert.rst
    │       ├── ibert.rst
    │       ├── layoutlm.rst
    │       ├── led.rst
    │       ├── longformer.rst
    │       ├── luke.rst
    │       ├── lxmert.rst
    │       ├── m2m_100.rst
    │       ├── marian.rst
    │       ├── mbart.rst
    │       ├── megatron_bert.rst
    │       ├── megatron_gpt2.rst
    │       ├── mobilebert.rst
    │       ├── mpnet.rst
    │       ├── mt5.rst
    │       ├── pegasus.rst
    │       ├── phobert.rst
    │       ├── prophetnet.rst
    │       ├── rag.rst
    │       ├── reformer.rst
    │       ├── retribert.rst
    │       ├── roberta.rst
    │       ├── roformer.rst
    │       ├── speech_to_text.rst
    │       ├── squeezebert.rst
    │       ├── t5.rst
    │       ├── tapas.rst
    │       ├── transformerxl.rst
    │       ├── visual_bert.rst
    │       ├── vit.rst
    │       ├── wav2vec2.rst
    │       ├── xlm.rst
    │       ├── xlmprophetnet.rst
    │       ├── xlmroberta.rst
    │       ├── xlnet.rst
    │       └── xlsr_wav2vec2.rst
    │   ├── model_sharing.rst
    │   ├── model_summary.rst
    │   ├── multilingual.rst
    │   ├── notebooks.md
    │   ├── performance.md
    │   ├── perplexity.rst
    │   ├── philosophy.rst
    │   ├── preprocessing.rst
    │   ├── pretrained_models.rst
    │   ├── quicktour.rst
    │   ├── sagemaker.md
    │   ├── serialization.rst
    │   ├── task_summary.rst
    │   ├── testing.rst
    │   ├── tokenizer_summary.rst
    │   ├── training.rst
    │   └── troubleshooting.md
├── examples
    ├── README.md
    ├── flax
    │   ├── README.md
    │   ├── language-modeling
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_clm_flax.py
    │   │   └── run_mlm_flax.py
    │   ├── summarization
    │   │   └── run_summarization_flax.py
    │   └── text-classification
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   └── run_flax_glue.py
    ├── legacy
    │   ├── README.md
    │   ├── multiple_choice
    │   │   ├── run_multiple_choice.py
    │   │   └── utils_multiple_choice.py
    │   ├── pytorch-lightning
    │   │   ├── lightning_base.py
    │   │   ├── requirements.txt
    │   │   ├── run_glue.py
    │   │   ├── run_glue.sh
    │   │   ├── run_ner.py
    │   │   ├── run_ner.sh
    │   │   └── run_pos.sh
    │   ├── question-answering
    │   │   ├── run_squad.py
    │   │   └── run_squad_trainer.py
    │   ├── run_camembert.py
    │   ├── run_chinese_ref.py
    │   ├── run_language_modeling.py
    │   ├── run_openai_gpt.py
    │   ├── run_swag.py
    │   ├── run_transfo_xl.py
    │   ├── seq2seq
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── convert_model_to_fp16.py
    │   │   ├── download_wmt.py
    │   │   ├── finetune.sh
    │   │   ├── finetune_tpu.sh
    │   │   ├── finetune_trainer.py
    │   │   ├── minify_dataset.py
    │   │   ├── old_test_calculate_rouge.py
    │   │   ├── old_test_datasets.py
    │   │   ├── old_test_fsmt_bleu_score.py
    │   │   ├── old_test_seq2seq_examples.py
    │   │   ├── old_test_seq2seq_examples_multi_gpu.py
    │   │   ├── old_test_tatoeba_conversion.py
    │   │   ├── pack_dataset.py
    │   │   ├── requirements.txt
    │   │   ├── romanian_postprocessing.md
    │   │   ├── rouge_cli.py
    │   │   ├── run_distributed_eval.py
    │   │   ├── run_eval.py
    │   │   ├── run_eval_search.py
    │   │   ├── save_len_file.py
    │   │   ├── save_randomly_initialized_model.py
    │   │   ├── sentence_splitter.py
    │   │   ├── seq2seq_trainer.py
    │   │   ├── seq2seq_training_args.py
    │   │   ├── test_data
    │   │   │   ├── fsmt
    │   │   │   │   ├── build-eval-data.py
    │   │   │   │   └── fsmt_val_data.json
    │   │   │   ├── test_data
    │   │   │   └── wmt_en_ro
    │   │   │   │   ├── test.source
    │   │   │   │   ├── test.target
    │   │   │   │   ├── train.len
    │   │   │   │   ├── train.source
    │   │   │   │   ├── train.target
    │   │   │   │   ├── val.len
    │   │   │   │   ├── val.source
    │   │   │   │   └── val.target
    │   │   ├── train_distil_marian_enro.sh
    │   │   ├── train_distil_marian_enro_tpu.sh
    │   │   ├── train_distilbart_cnn.sh
    │   │   ├── train_mbart_cc25_enro.sh
    │   │   ├── utils.py
    │   │   └── xla_spawn.py
    │   ├── text-classification
    │   │   └── run_tf_text_classification.py
    │   └── token-classification
    │   │   ├── README.md
    │   │   ├── run.sh
    │   │   ├── run_chunk.sh
    │   │   ├── run_ner.py
    │   │   ├── run_pos.sh
    │   │   ├── run_tf_ner.py
    │   │   ├── scripts
    │   │       └── preprocess.py
    │   │   ├── tasks.py
    │   │   └── utils_ner.py
    ├── pytorch
    │   ├── README.md
    │   ├── _tests_requirements.txt
    │   ├── benchmarking
    │   │   ├── README.md
    │   │   ├── plot_csv_file.py
    │   │   ├── requirements.txt
    │   │   └── run_benchmark.py
    │   ├── conftest.py
    │   ├── language-modeling
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_clm.py
    │   │   ├── run_clm_no_trainer.py
    │   │   ├── run_mlm.py
    │   │   ├── run_mlm_no_trainer.py
    │   │   └── run_plm.py
    │   ├── multiple-choice
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_no_trainer.sh
    │   │   ├── run_swag.py
    │   │   └── run_swag_no_trainer.py
    │   ├── question-answering
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_qa.py
    │   │   ├── run_qa_beam_search.py
    │   │   ├── run_qa_beam_search_no_trainer.py
    │   │   ├── run_qa_no_trainer.py
    │   │   ├── trainer_qa.py
    │   │   └── utils_qa.py
    │   ├── summarization
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_summarization.py
    │   │   ├── run_summarization_dataset.py
    │   │   ├── run_summarization_no_trainer.py
    │   │   └── run_summarization_test.py
    │   ├── test_examples.py
    │   ├── test_xla_examples.py
    │   ├── text-classification
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_glue.py
    │   │   ├── run_glue_no_trainer.py
    │   │   └── run_xnli.py
    │   ├── text-generation
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   └── run_generation.py
    │   ├── token-classification
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── run_ner.py
    │   │   ├── run_ner_no_trainer.py
    │   │   └── run_no_trainer.sh
    │   ├── translation
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_translation.py
    │   │   └── run_translation_no_trainer.py
    │   └── xla_spawn.py
    ├── research_projects
    │   ├── README.md
    │   ├── adversarial
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_hans.py
    │   │   └── utils_hans.py
    │   ├── bert-loses-patience
    │   │   ├── README.md
    │   │   ├── pabee
    │   │   │   ├── __init__.py
    │   │   │   ├── modeling_pabee_albert.py
    │   │   │   └── modeling_pabee_bert.py
    │   │   ├── requirements.txt
    │   │   ├── run_glue_with_pabee.py
    │   │   └── test_run_glue_with_pabee.py
    │   ├── bertabs
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── configuration_bertabs.py
    │   │   ├── convert_bertabs_original_pytorch_checkpoint.py
    │   │   ├── modeling_bertabs.py
    │   │   ├── requirements.txt
    │   │   ├── run_summarization.py
    │   │   ├── test_utils_summarization.py
    │   │   └── utils_summarization.py
    │   ├── bertology
    │   │   ├── requirements.txt
    │   │   ├── run_bertology.py
    │   │   └── run_prune_gpt.py
    │   ├── deebert
    │   │   ├── README.md
    │   │   ├── entropy_eval.sh
    │   │   ├── eval_deebert.sh
    │   │   ├── requirements.txt
    │   │   ├── run_glue_deebert.py
    │   │   ├── src
    │   │   │   ├── __init__.py
    │   │   │   ├── modeling_highway_bert.py
    │   │   │   └── modeling_highway_roberta.py
    │   │   ├── test_glue_deebert.py
    │   │   └── train_deebert.sh
    │   ├── distillation
    │   │   ├── README.md
    │   │   ├── distiller.py
    │   │   ├── grouped_batch_sampler.py
    │   │   ├── lm_seqs_dataset.py
    │   │   ├── requirements.txt
    │   │   ├── run_squad_w_distillation.py
    │   │   ├── scripts
    │   │   │   ├── binarized_data.py
    │   │   │   ├── extract.py
    │   │   │   ├── extract_distilbert.py
    │   │   │   └── token_counts.py
    │   │   ├── train.py
    │   │   ├── training_configs
    │   │   │   ├── distilbert-base-cased.json
    │   │   │   ├── distilbert-base-multilingual-cased.json
    │   │   │   ├── distilbert-base-uncased.json
    │   │   │   ├── distilgpt2.json
    │   │   │   └── distilroberta-base.json
    │   │   └── utils.py
    │   ├── jax-projects
    │   │   ├── HOW_TO_PROPOSE_PROJECT.md
    │   │   └── README.md
    │   ├── longform-qa
    │   │   ├── README.md
    │   │   ├── eli5_app.py
    │   │   ├── eli5_utils.py
    │   │   └── requirements.txt
    │   ├── lxmert
    │   │   ├── README.md
    │   │   ├── demo.ipynb
    │   │   ├── extracting_data.py
    │   │   ├── modeling_frcnn.py
    │   │   ├── processing_image.py
    │   │   ├── requirements.txt
    │   │   ├── utils.py
    │   │   └── visualizing_image.py
    │   ├── mlm_wwm
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run_chinese_ref.py
    │   │   └── run_mlm_wwm.py
    │   ├── mm-imdb
    │   │   ├── README.md
    │   │   ├── run_mmimdb.py
    │   │   └── utils_mmimdb.py
    │   ├── movement-pruning
    │   │   ├── README.md
    │   │   ├── Saving_PruneBERT.ipynb
    │   │   ├── bertarize.py
    │   │   ├── counts_parameters.py
    │   │   ├── emmental
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_bert_masked.py
    │   │   │   ├── modeling_bert_masked.py
    │   │   │   └── modules
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── binarizer.py
    │   │   │   │   └── masked_nn.py
    │   │   ├── masked_run_glue.py
    │   │   ├── masked_run_squad.py
    │   │   └── requirements.txt
    │   ├── performer
    │   │   ├── README.md
    │   │   ├── full_script.sh
    │   │   ├── modeling_flax_performer.py
    │   │   ├── modeling_flax_performer_utils.py
    │   │   ├── run_mlm_performer.py
    │   │   └── sanity_script.sh
    │   ├── pplm
    │   │   ├── README.md
    │   │   ├── imgs
    │   │   │   ├── headfigure.png
    │   │   │   └── wooly.png
    │   │   ├── pplm_classification_head.py
    │   │   ├── requirements.txt
    │   │   ├── run_pplm.py
    │   │   └── run_pplm_discrim_train.py
    │   ├── rag-end2end-retriever
    │   │   ├── README.md
    │   │   ├── callbacks_rag.py
    │   │   ├── distributed_ray_retriever.py
    │   │   ├── eval_rag.py
    │   │   ├── finetune_rag.py
    │   │   ├── finetune_rag_ray_end2end.sh
    │   │   ├── kb_encode_utils.py
    │   │   ├── lightning_base.py
    │   │   ├── requirements.txt
    │   │   ├── test_run
    │   │   │   ├── dummy-kb
    │   │   │   │   └── my_knowledge_dataset.csv
    │   │   │   ├── dummy-train-data
    │   │   │   │   ├── train.source
    │   │   │   │   ├── train.target
    │   │   │   │   ├── val.source
    │   │   │   │   └── val.target
    │   │   │   ├── test_finetune.sh
    │   │   │   └── test_rag_new_features.sh
    │   │   ├── use_own_knowledge_dataset.py
    │   │   └── utils_rag.py
    │   ├── rag
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── _test_finetune_rag.py
    │   │   ├── callbacks_rag.py
    │   │   ├── consolidate_rag_checkpoint.py
    │   │   ├── distributed_pytorch_retriever.py
    │   │   ├── distributed_ray_retriever.py
    │   │   ├── eval_rag.py
    │   │   ├── finetune_rag.py
    │   │   ├── finetune_rag.sh
    │   │   ├── finetune_rag_ray.sh
    │   │   ├── lightning_base.py
    │   │   ├── parse_dpr_relevance_data.py
    │   │   ├── requirements.txt
    │   │   ├── test_data
    │   │   │   └── my_knowledge_dataset.csv
    │   │   ├── test_distributed_retriever.py
    │   │   ├── use_own_knowledge_dataset.py
    │   │   └── utils_rag.py
    │   ├── seq2seq-distillation
    │   │   ├── README.md
    │   │   ├── _test_bash_script.py
    │   │   ├── _test_make_student.py
    │   │   ├── _test_seq2seq_examples.py
    │   │   ├── _test_seq2seq_examples_multi_gpu.py
    │   │   ├── callbacks.py
    │   │   ├── convert_pl_checkpoint_to_hf.py
    │   │   ├── distil_marian_enro_teacher.sh
    │   │   ├── distil_marian_no_teacher.sh
    │   │   ├── distillation.py
    │   │   ├── dynamic_bs_example.sh
    │   │   ├── finetune.py
    │   │   ├── finetune.sh
    │   │   ├── finetune_bart_tiny.sh
    │   │   ├── finetune_pegasus_xsum.sh
    │   │   ├── finetune_t5.sh
    │   │   ├── lightning_base.py
    │   │   ├── make_student.py
    │   │   ├── precomputed_pseudo_labels.md
    │   │   ├── requirements.txt
    │   │   ├── run_eval.py
    │   │   ├── sentence_splitter.py
    │   │   ├── train_distilbart_cnn.sh
    │   │   ├── train_distilbart_xsum.sh
    │   │   ├── train_mbart_cc25_enro.sh
    │   │   └── utils.py
    │   ├── wav2vec2
    │   │   ├── FINE_TUNE_XLSR_WAV2VEC2.md
    │   │   ├── README.md
    │   │   ├── ds_config_wav2vec2_zero2.json
    │   │   ├── ds_config_wav2vec2_zero3.json
    │   │   ├── finetune_base_100.sh
    │   │   ├── finetune_base_timit_asr.sh
    │   │   ├── finetune_large_lv60_100.sh
    │   │   ├── finetune_large_lv60_timit_asr.sh
    │   │   ├── finetune_large_xlsr_53_arabic_speech_corpus.sh
    │   │   ├── finetune_wav2vec2_xlsr_turkish.sh
    │   │   ├── requirements.txt
    │   │   ├── run_asr.py
    │   │   ├── run_common_voice.py
    │   │   ├── run_pretrain.py
    │   │   ├── test_wav2vec2_deepspeed.py
    │   │   └── vocab
    │   │   │   └── buckwalter.json
    │   └── zero-shot-distillation
    │   │   ├── README.md
    │   │   └── distill_classifier.py
    └── tensorflow
    │   ├── README.md
    │   ├── benchmarking
    │       ├── README.md
    │       ├── plot_csv_file.py
    │       ├── requirements.txt
    │       └── run_benchmark_tf.py
    │   ├── multiple-choice
    │       ├── README.md
    │       ├── requirements.txt
    │       ├── run_tf_multiple_choice.py
    │       └── utils_multiple_choice.py
    │   ├── question-answering
    │       ├── README.md
    │       ├── requirements.txt
    │       ├── run_qa.py
    │       └── utils_qa.py
    │   └── text-classification
    │       ├── README.md
    │       ├── requirements.txt
    │       ├── run_glue.py
    │       └── run_text_classification.py
├── exps
    ├── romanian_postprocess.sh
    ├── run_en_ro.sh
    ├── run_glue.sh
    └── run_xsum.sh
├── hubconf.py
├── img
    └── intro.png
├── model_cards
    └── README.md
├── notebooks
    ├── 01-training-tokenizers.ipynb
    ├── 02-transformers.ipynb
    ├── 03-pipelines.ipynb
    ├── 04-onnx-export.ipynb
    ├── 05-benchmark.ipynb
    └── README.md
├── petl
    ├── __init__.py
    ├── custom_callback.py
    ├── dynamic_batching.py
    ├── options.py
    ├── petl_enc_model.py
    ├── petl_encdec_model.py
    ├── petl_factory.py
    └── utils.py
├── pyproject.toml
├── scripts
    ├── check_tokenizers.py
    ├── fsmt
    │   ├── convert-allenai-wmt16.sh
    │   ├── convert-allenai-wmt19.sh
    │   ├── convert-facebook-wmt19.sh
    │   ├── eval-allenai-wmt16.sh
    │   ├── eval-allenai-wmt19.sh
    │   ├── eval-facebook-wmt19.sh
    │   ├── fsmt-make-super-tiny-model.py
    │   ├── fsmt-make-tiny-model.py
    │   ├── gen-card-allenai-wmt16.py
    │   ├── gen-card-allenai-wmt19.py
    │   ├── gen-card-facebook-wmt19.py
    │   ├── s3-move.sh
    │   └── tests-to-run.sh
    ├── pegasus
    │   └── build_test_sample_spm_no_bos.py
    ├── stale.py
    └── tatoeba
    │   ├── README.md
    │   └── upload_models.sh
├── setup.cfg
├── setup.py
├── src
    └── transformers
    │   ├── __init__.py
    │   ├── activations.py
    │   ├── activations_tf.py
    │   ├── benchmark
    │       ├── __init__.py
    │       ├── benchmark.py
    │       ├── benchmark_args.py
    │       ├── benchmark_args_tf.py
    │       ├── benchmark_args_utils.py
    │       ├── benchmark_tf.py
    │       └── benchmark_utils.py
    │   ├── commands
    │       ├── __init__.py
    │       ├── add_new_model.py
    │       ├── convert.py
    │       ├── download.py
    │       ├── env.py
    │       ├── lfs.py
    │       ├── run.py
    │       ├── serving.py
    │       ├── train.py
    │       ├── transformers_cli.py
    │       └── user.py
    │   ├── configuration_utils.py
    │   ├── convert_graph_to_onnx.py
    │   ├── convert_pytorch_checkpoint_to_tf2.py
    │   ├── convert_slow_tokenizer.py
    │   ├── convert_slow_tokenizers_checkpoints_to_fast.py
    │   ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py
    │   ├── data
    │       ├── __init__.py
    │       ├── data_collator.py
    │       ├── datasets
    │       │   ├── __init__.py
    │       │   ├── glue.py
    │       │   ├── language_modeling.py
    │       │   └── squad.py
    │       ├── metrics
    │       │   ├── __init__.py
    │       │   └── squad_metrics.py
    │       ├── processors
    │       │   ├── __init__.py
    │       │   ├── glue.py
    │       │   ├── squad.py
    │       │   ├── utils.py
    │       │   └── xnli.py
    │       └── test_generation_utils.py
    │   ├── debug_utils.py
    │   ├── deepspeed.py
    │   ├── dependency_versions_check.py
    │   ├── dependency_versions_table.py
    │   ├── feature_extraction_sequence_utils.py
    │   ├── feature_extraction_utils.py
    │   ├── file_utils.py
    │   ├── generation_beam_search.py
    │   ├── generation_flax_logits_process.py
    │   ├── generation_flax_utils.py
    │   ├── generation_logits_process.py
    │   ├── generation_stopping_criteria.py
    │   ├── generation_tf_utils.py
    │   ├── generation_utils.py
    │   ├── hf_api.py
    │   ├── hf_argparser.py
    │   ├── image_utils.py
    │   ├── integrations.py
    │   ├── modelcard.py
    │   ├── modeling_flax_outputs.py
    │   ├── modeling_flax_pytorch_utils.py
    │   ├── modeling_flax_utils.py
    │   ├── modeling_outputs.py
    │   ├── modeling_tf_outputs.py
    │   ├── modeling_tf_pytorch_utils.py
    │   ├── modeling_tf_utils.py
    │   ├── modeling_utils.py
    │   ├── models
    │       ├── __init__.py
    │       ├── albert
    │       │   ├── __init__.py
    │       │   ├── configuration_albert.py
    │       │   ├── convert_albert_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_albert.py
    │       │   ├── modeling_tf_albert.py
    │       │   ├── tokenization_albert.py
    │       │   └── tokenization_albert_fast.py
    │       ├── auto
    │       │   ├── __init__.py
    │       │   ├── auto_factory.py
    │       │   ├── configuration_auto.py
    │       │   ├── feature_extraction_auto.py
    │       │   ├── modeling_auto.py
    │       │   ├── modeling_flax_auto.py
    │       │   ├── modeling_tf_auto.py
    │       │   └── tokenization_auto.py
    │       ├── bart
    │       │   ├── __init__.py
    │       │   ├── configuration_bart.py
    │       │   ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_bart.py
    │       │   ├── modeling_flax_bart.py
    │       │   ├── modeling_tf_bart.py
    │       │   ├── tokenization_bart.py
    │       │   └── tokenization_bart_fast.py
    │       ├── barthez
    │       │   ├── __init__.py
    │       │   ├── tokenization_barthez.py
    │       │   └── tokenization_barthez_fast.py
    │       ├── bert
    │       │   ├── __init__.py
    │       │   ├── configuration_bert.py
    │       │   ├── convert_bert_original_tf2_checkpoint_to_pytorch.py
    │       │   ├── convert_bert_original_tf_checkpoint_to_pytorch.py
    │       │   ├── convert_bert_pytorch_checkpoint_to_original_tf.py
    │       │   ├── modeling_bert.py
    │       │   ├── modeling_flax_bert.py
    │       │   ├── modeling_tf_bert.py
    │       │   ├── tokenization_bert.py
    │       │   └── tokenization_bert_fast.py
    │       ├── bert_generation
    │       │   ├── __init__.py
    │       │   ├── configuration_bert_generation.py
    │       │   ├── modeling_bert_generation.py
    │       │   └── tokenization_bert_generation.py
    │       ├── bert_japanese
    │       │   ├── __init__.py
    │       │   └── tokenization_bert_japanese.py
    │       ├── bertweet
    │       │   ├── __init__.py
    │       │   └── tokenization_bertweet.py
    │       ├── big_bird
    │       │   ├── __init__.py
    │       │   ├── configuration_big_bird.py
    │       │   ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_big_bird.py
    │       │   ├── modeling_flax_big_bird.py
    │       │   ├── tokenization_big_bird.py
    │       │   └── tokenization_big_bird_fast.py
    │       ├── bigbird_pegasus
    │       │   ├── __init__.py
    │       │   ├── configuration_bigbird_pegasus.py
    │       │   ├── convert_bigbird_pegasus_tf_to_pytorch.py
    │       │   └── modeling_bigbird_pegasus.py
    │       ├── blenderbot
    │       │   ├── __init__.py
    │       │   ├── configuration_blenderbot.py
    │       │   ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_blenderbot.py
    │       │   ├── modeling_tf_blenderbot.py
    │       │   └── tokenization_blenderbot.py
    │       ├── blenderbot_small
    │       │   ├── __init__.py
    │       │   ├── configuration_blenderbot_small.py
    │       │   ├── modeling_blenderbot_small.py
    │       │   ├── modeling_tf_blenderbot_small.py
    │       │   ├── tokenization_blenderbot_small.py
    │       │   └── tokenization_blenderbot_small_fast.py
    │       ├── bort
    │       │   └── convert_bort_original_gluonnlp_checkpoint_to_pytorch.py
    │       ├── byt5
    │       │   ├── __init__.py
    │       │   ├── convert_byt5_original_tf_checkpoint_to_pytorch.py
    │       │   └── tokenization_byt5.py
    │       ├── camembert
    │       │   ├── __init__.py
    │       │   ├── configuration_camembert.py
    │       │   ├── modeling_camembert.py
    │       │   ├── modeling_tf_camembert.py
    │       │   ├── tokenization_camembert.py
    │       │   └── tokenization_camembert_fast.py
    │       ├── clip
    │       │   ├── __init__.py
    │       │   ├── configuration_clip.py
    │       │   ├── convert_clip_original_pytorch_to_hf.py
    │       │   ├── feature_extraction_clip.py
    │       │   ├── modeling_clip.py
    │       │   ├── modeling_flax_clip.py
    │       │   ├── processing_clip.py
    │       │   ├── tokenization_clip.py
    │       │   └── tokenization_clip_fast.py
    │       ├── convbert
    │       │   ├── __init__.py
    │       │   ├── configuration_convbert.py
    │       │   ├── convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py
    │       │   ├── modeling_convbert.py
    │       │   ├── modeling_tf_convbert.py
    │       │   ├── tokenization_convbert.py
    │       │   └── tokenization_convbert_fast.py
    │       ├── cpm
    │       │   ├── __init__.py
    │       │   └── tokenization_cpm.py
    │       ├── ctrl
    │       │   ├── __init__.py
    │       │   ├── configuration_ctrl.py
    │       │   ├── modeling_ctrl.py
    │       │   ├── modeling_tf_ctrl.py
    │       │   └── tokenization_ctrl.py
    │       ├── deberta
    │       │   ├── __init__.py
    │       │   ├── configuration_deberta.py
    │       │   ├── modeling_deberta.py
    │       │   ├── tokenization_deberta.py
    │       │   └── tokenization_deberta_fast.py
    │       ├── deberta_v2
    │       │   ├── __init__.py
    │       │   ├── configuration_deberta_v2.py
    │       │   ├── modeling_deberta_v2.py
    │       │   └── tokenization_deberta_v2.py
    │       ├── deit
    │       │   ├── __init__.py
    │       │   ├── configuration_deit.py
    │       │   ├── convert_deit_timm_to_pytorch.py
    │       │   ├── feature_extraction_deit.py
    │       │   └── modeling_deit.py
    │       ├── detr
    │       │   ├── __init__.py
    │       │   ├── configuration_detr.py
    │       │   ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── feature_extraction_detr.py
    │       │   └── modeling_detr.py
    │       ├── dialogpt
    │       │   ├── __init__.py
    │       │   └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
    │       ├── distilbert
    │       │   ├── __init__.py
    │       │   ├── configuration_distilbert.py
    │       │   ├── modeling_distilbert.py
    │       │   ├── modeling_tf_distilbert.py
    │       │   ├── tokenization_distilbert.py
    │       │   └── tokenization_distilbert_fast.py
    │       ├── dpr
    │       │   ├── __init__.py
    │       │   ├── configuration_dpr.py
    │       │   ├── convert_dpr_original_checkpoint_to_pytorch.py
    │       │   ├── modeling_dpr.py
    │       │   ├── modeling_tf_dpr.py
    │       │   ├── tokenization_dpr.py
    │       │   └── tokenization_dpr_fast.py
    │       ├── electra
    │       │   ├── __init__.py
    │       │   ├── configuration_electra.py
    │       │   ├── convert_electra_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_electra.py
    │       │   ├── modeling_flax_electra.py
    │       │   ├── modeling_tf_electra.py
    │       │   ├── tokenization_electra.py
    │       │   └── tokenization_electra_fast.py
    │       ├── encoder_decoder
    │       │   ├── __init__.py
    │       │   ├── configuration_encoder_decoder.py
    │       │   └── modeling_encoder_decoder.py
    │       ├── flaubert
    │       │   ├── __init__.py
    │       │   ├── configuration_flaubert.py
    │       │   ├── modeling_flaubert.py
    │       │   ├── modeling_tf_flaubert.py
    │       │   └── tokenization_flaubert.py
    │       ├── fsmt
    │       │   ├── __init__.py
    │       │   ├── configuration_fsmt.py
    │       │   ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_fsmt.py
    │       │   └── tokenization_fsmt.py
    │       ├── funnel
    │       │   ├── __init__.py
    │       │   ├── configuration_funnel.py
    │       │   ├── convert_funnel_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_funnel.py
    │       │   ├── modeling_tf_funnel.py
    │       │   ├── tokenization_funnel.py
    │       │   └── tokenization_funnel_fast.py
    │       ├── gpt2
    │       │   ├── __init__.py
    │       │   ├── configuration_gpt2.py
    │       │   ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_flax_gpt2.py
    │       │   ├── modeling_gpt2.py
    │       │   ├── modeling_tf_gpt2.py
    │       │   ├── tokenization_gpt2.py
    │       │   └── tokenization_gpt2_fast.py
    │       ├── gpt_neo
    │       │   ├── __init__.py
    │       │   ├── configuration_gpt_neo.py
    │       │   ├── convert_gpt_neo_mesh_tf_to_pytorch.py
    │       │   └── modeling_gpt_neo.py
    │       ├── herbert
    │       │   ├── __init__.py
    │       │   ├── tokenization_herbert.py
    │       │   └── tokenization_herbert_fast.py
    │       ├── hubert
    │       │   ├── __init__.py
    │       │   ├── configuration_hubert.py
    │       │   ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py
    │       │   └── modeling_hubert.py
    │       ├── ibert
    │       │   ├── __init__.py
    │       │   ├── configuration_ibert.py
    │       │   ├── modeling_ibert.py
    │       │   └── quant_modules.py
    │       ├── layoutlm
    │       │   ├── __init__.py
    │       │   ├── configuration_layoutlm.py
    │       │   ├── modeling_layoutlm.py
    │       │   ├── modeling_tf_layoutlm.py
    │       │   ├── tokenization_layoutlm.py
    │       │   └── tokenization_layoutlm_fast.py
    │       ├── led
    │       │   ├── __init__.py
    │       │   ├── configuration_led.py
    │       │   ├── modeling_led.py
    │       │   ├── modeling_tf_led.py
    │       │   ├── tokenization_led.py
    │       │   └── tokenization_led_fast.py
    │       ├── longformer
    │       │   ├── __init__.py
    │       │   ├── configuration_longformer.py
    │       │   ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
    │       │   ├── modeling_longformer.py
    │       │   ├── modeling_tf_longformer.py
    │       │   ├── tokenization_longformer.py
    │       │   └── tokenization_longformer_fast.py
    │       ├── luke
    │       │   ├── __init__.py
    │       │   ├── configuration_luke.py
    │       │   ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_luke.py
    │       │   └── tokenization_luke.py
    │       ├── lxmert
    │       │   ├── __init__.py
    │       │   ├── configuration_lxmert.py
    │       │   ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_lxmert.py
    │       │   ├── modeling_tf_lxmert.py
    │       │   ├── tokenization_lxmert.py
    │       │   └── tokenization_lxmert_fast.py
    │       ├── m2m_100
    │       │   ├── __init__.py
    │       │   ├── configuration_m2m_100.py
    │       │   ├── convert_m2m100_original_checkpoint_to_pytorch.py
    │       │   ├── modeling_m2m_100.py
    │       │   └── tokenization_m2m_100.py
    │       ├── marian
    │       │   ├── __init__.py
    │       │   ├── configuration_marian.py
    │       │   ├── convert_marian_tatoeba_to_pytorch.py
    │       │   ├── convert_marian_to_pytorch.py
    │       │   ├── modeling_marian.py
    │       │   ├── modeling_tf_marian.py
    │       │   └── tokenization_marian.py
    │       ├── mbart
    │       │   ├── __init__.py
    │       │   ├── configuration_mbart.py
    │       │   ├── convert_mbart_original_checkpoint_to_pytorch.py
    │       │   ├── modeling_mbart.py
    │       │   ├── modeling_tf_mbart.py
    │       │   ├── tokenization_mbart.py
    │       │   ├── tokenization_mbart50.py
    │       │   ├── tokenization_mbart50_fast.py
    │       │   └── tokenization_mbart_fast.py
    │       ├── megatron_bert
    │       │   ├── __init__.py
    │       │   ├── configuration_megatron_bert.py
    │       │   ├── convert_megatron_bert_checkpoint.py
    │       │   └── modeling_megatron_bert.py
    │       ├── megatron_gpt2
    │       │   └── convert_megatron_gpt2_checkpoint.py
    │       ├── mmbt
    │       │   ├── __init__.py
    │       │   ├── configuration_mmbt.py
    │       │   └── modeling_mmbt.py
    │       ├── mobilebert
    │       │   ├── __init__.py
    │       │   ├── configuration_mobilebert.py
    │       │   ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_mobilebert.py
    │       │   ├── modeling_tf_mobilebert.py
    │       │   ├── tokenization_mobilebert.py
    │       │   └── tokenization_mobilebert_fast.py
    │       ├── mpnet
    │       │   ├── __init__.py
    │       │   ├── configuration_mpnet.py
    │       │   ├── modeling_mpnet.py
    │       │   ├── modeling_tf_mpnet.py
    │       │   ├── tokenization_mpnet.py
    │       │   └── tokenization_mpnet_fast.py
    │       ├── mt5
    │       │   ├── __init__.py
    │       │   ├── configuration_mt5.py
    │       │   ├── modeling_mt5.py
    │       │   └── modeling_tf_mt5.py
    │       ├── openai
    │       │   ├── __init__.py
    │       │   ├── configuration_openai.py
    │       │   ├── convert_openai_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_openai.py
    │       │   ├── modeling_tf_openai.py
    │       │   ├── tokenization_openai.py
    │       │   └── tokenization_openai_fast.py
    │       ├── pegasus
    │       │   ├── __init__.py
    │       │   ├── configuration_pegasus.py
    │       │   ├── convert_pegasus_tf_to_pytorch.py
    │       │   ├── modeling_pegasus.py
    │       │   ├── modeling_tf_pegasus.py
    │       │   ├── tokenization_pegasus.py
    │       │   └── tokenization_pegasus_fast.py
    │       ├── phobert
    │       │   ├── __init__.py
    │       │   └── tokenization_phobert.py
    │       ├── prophetnet
    │       │   ├── __init__.py
    │       │   ├── configuration_prophetnet.py
    │       │   ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_prophetnet.py
    │       │   └── tokenization_prophetnet.py
    │       ├── rag
    │       │   ├── __init__.py
    │       │   ├── configuration_rag.py
    │       │   ├── modeling_rag.py
    │       │   ├── modeling_tf_rag.py
    │       │   ├── retrieval_rag.py
    │       │   └── tokenization_rag.py
    │       ├── reformer
    │       │   ├── __init__.py
    │       │   ├── configuration_reformer.py
    │       │   ├── convert_reformer_trax_checkpoint_to_pytorch.py
    │       │   ├── modeling_reformer.py
    │       │   ├── tokenization_reformer.py
    │       │   └── tokenization_reformer_fast.py
    │       ├── retribert
    │       │   ├── __init__.py
    │       │   ├── configuration_retribert.py
    │       │   ├── modeling_retribert.py
    │       │   ├── tokenization_retribert.py
    │       │   └── tokenization_retribert_fast.py
    │       ├── roberta
    │       │   ├── __init__.py
    │       │   ├── configuration_roberta.py
    │       │   ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_flax_roberta.py
    │       │   ├── modeling_roberta.py
    │       │   ├── modeling_tf_roberta.py
    │       │   ├── tokenization_roberta.py
    │       │   └── tokenization_roberta_fast.py
    │       ├── roformer
    │       │   ├── __init__.py
    │       │   ├── configuration_roformer.py
    │       │   ├── convert_roformer_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_roformer.py
    │       │   ├── modeling_tf_roformer.py
    │       │   ├── tokenization_roformer.py
    │       │   ├── tokenization_roformer_fast.py
    │       │   └── tokenization_utils.py
    │       ├── speech_to_text
    │       │   ├── __init__.py
    │       │   ├── configuration_speech_to_text.py
    │       │   ├── convert_s2t_fairseq_to_tfms.py
    │       │   ├── feature_extraction_speech_to_text.py
    │       │   ├── modeling_speech_to_text.py
    │       │   ├── processing_speech_to_text.py
    │       │   └── tokenization_speech_to_text.py
    │       ├── squeezebert
    │       │   ├── __init__.py
    │       │   ├── configuration_squeezebert.py
    │       │   ├── modeling_squeezebert.py
    │       │   ├── tokenization_squeezebert.py
    │       │   └── tokenization_squeezebert_fast.py
    │       ├── t5
    │       │   ├── __init__.py
    │       │   ├── configuration_t5.py
    │       │   ├── convert_t5_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_flax_t5.py
    │       │   ├── modeling_t5.py
    │       │   ├── modeling_tf_t5.py
    │       │   ├── tokenization_t5.py
    │       │   └── tokenization_t5_fast.py
    │       ├── tapas
    │       │   ├── __init__.py
    │       │   ├── configuration_tapas.py
    │       │   ├── convert_tapas_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_tapas.py
    │       │   └── tokenization_tapas.py
    │       ├── transfo_xl
    │       │   ├── __init__.py
    │       │   ├── configuration_transfo_xl.py
    │       │   ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_tf_transfo_xl.py
    │       │   ├── modeling_tf_transfo_xl_utilities.py
    │       │   ├── modeling_transfo_xl.py
    │       │   ├── modeling_transfo_xl_utilities.py
    │       │   └── tokenization_transfo_xl.py
    │       ├── visual_bert
    │       │   ├── __init__.py
    │       │   ├── configuration_visual_bert.py
    │       │   ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
    │       │   └── modeling_visual_bert.py
    │       ├── vit
    │       │   ├── __init__.py
    │       │   ├── configuration_vit.py
    │       │   ├── convert_vit_timm_to_pytorch.py
    │       │   ├── feature_extraction_vit.py
    │       │   ├── modeling_flax_vit.py
    │       │   └── modeling_vit.py
    │       ├── wav2vec2
    │       │   ├── __init__.py
    │       │   ├── configuration_wav2vec2.py
    │       │   ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── feature_extraction_wav2vec2.py
    │       │   ├── modeling_tf_wav2vec2.py
    │       │   ├── modeling_wav2vec2.py
    │       │   ├── processing_wav2vec2.py
    │       │   └── tokenization_wav2vec2.py
    │       ├── xlm
    │       │   ├── __init__.py
    │       │   ├── configuration_xlm.py
    │       │   ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
    │       │   ├── modeling_tf_xlm.py
    │       │   ├── modeling_xlm.py
    │       │   └── tokenization_xlm.py
    │       ├── xlm_prophetnet
    │       │   ├── __init__.py
    │       │   ├── configuration_xlm_prophetnet.py
    │       │   ├── modeling_xlm_prophetnet.py
    │       │   └── tokenization_xlm_prophetnet.py
    │       ├── xlm_roberta
    │       │   ├── __init__.py
    │       │   ├── configuration_xlm_roberta.py
    │       │   ├── modeling_tf_xlm_roberta.py
    │       │   ├── modeling_xlm_roberta.py
    │       │   ├── tokenization_xlm_roberta.py
    │       │   └── tokenization_xlm_roberta_fast.py
    │       └── xlnet
    │       │   ├── __init__.py
    │       │   ├── configuration_xlnet.py
    │       │   ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
    │       │   ├── modeling_tf_xlnet.py
    │       │   ├── modeling_xlnet.py
    │       │   ├── tokenization_xlnet.py
    │       │   └── tokenization_xlnet_fast.py
    │   ├── optimization.py
    │   ├── optimization_tf.py
    │   ├── pipelines
    │       ├── __init__.py
    │       ├── automatic_speech_recognition.py
    │       ├── base.py
    │       ├── conversational.py
    │       ├── feature_extraction.py
    │       ├── fill_mask.py
    │       ├── image_classification.py
    │       ├── question_answering.py
    │       ├── table_question_answering.py
    │       ├── text2text_generation.py
    │       ├── text_classification.py
    │       ├── text_generation.py
    │       ├── token_classification.py
    │       └── zero_shot_classification.py
    │   ├── sagemaker
    │       ├── __init__.py
    │       ├── trainer_sm.py
    │       └── training_args_sm.py
    │   ├── testing_utils.py
    │   ├── tokenization_utils.py
    │   ├── tokenization_utils_base.py
    │   ├── tokenization_utils_fast.py
    │   ├── trainer.py
    │   ├── trainer_callback.py
    │   ├── trainer_pt_utils.py
    │   ├── trainer_seq2seq.py
    │   ├── trainer_tf.py
    │   ├── trainer_utils.py
    │   ├── training_args.py
    │   ├── training_args_seq2seq.py
    │   ├── training_args_tf.py
    │   └── utils
    │       ├── __init__.py
    │       ├── coco_classes.py
    │       ├── dummy_flax_objects.py
    │       ├── dummy_pt_objects.py
    │       ├── dummy_sentencepiece_and_speech_objects.py
    │       ├── dummy_sentencepiece_and_tokenizers_objects.py
    │       ├── dummy_sentencepiece_objects.py
    │       ├── dummy_speech_objects.py
    │       ├── dummy_tf_objects.py
    │       ├── dummy_timm_and_vision_objects.py
    │       ├── dummy_timm_objects.py
    │       ├── dummy_tokenizers_objects.py
    │       ├── dummy_vision_objects.py
    │       ├── fx.py
    │       ├── hp_naming.py
    │       ├── imagenet_classes.py
    │       ├── logging.py
    │       ├── model_parallel_utils.py
    │       ├── modeling_auto_mapping.py
    │       ├── notebook.py
    │       ├── sentencepiece_model_pb2.py
    │       └── versions.py
├── templates
    ├── adding_a_new_example_script
    │   ├── README.md
    │   ├── cookiecutter.json
    │   └── {{cookiecutter.directory_name}}
    │   │   └── run_{{cookiecutter.example_shortcut}}.py
    └── adding_a_new_model
    │   ├── ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
    │   ├── README.md
    │   ├── cookiecutter-template-{{cookiecutter.modelname}}
    │       ├── __init__.py
    │       ├── configuration.json
    │       ├── configuration_{{cookiecutter.lowercase_modelname}}.py
    │       ├── modeling_tf_{{cookiecutter.lowercase_modelname}}.py
    │       ├── modeling_{{cookiecutter.lowercase_modelname}}.py
    │       ├── test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py
    │       ├── test_modeling_{{cookiecutter.lowercase_modelname}}.py
    │       ├── to_replace_{{cookiecutter.lowercase_modelname}}.py
    │       ├── tokenization_fast_{{cookiecutter.lowercase_modelname}}.py
    │       ├── tokenization_{{cookiecutter.lowercase_modelname}}.py
    │       └── {{cookiecutter.lowercase_modelname}}.rst
    │   ├── cookiecutter.json
    │   ├── open_model_proposals
    │       ├── ADD_BIG_BIRD.md
    │       └── README.md
    │   └── tests
    │       ├── encoder-bert-tokenizer.json
    │       ├── pt-encoder-bert-tokenizer.json
    │       ├── pt-seq-2-seq-bart-tokenizer.json
    │       ├── standalone.json
    │       ├── tf-encoder-bert-tokenizer.json
    │       └── tf-seq-2-seq-bart-tokenizer.json
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── deepspeed
    │   ├── ds_config_zero2.json
    │   ├── ds_config_zero3.json
    │   └── test_deepspeed.py
    ├── extended
    │   └── test_trainer_ext.py
    ├── fixtures
    │   ├── dummy-config.json
    │   ├── dummy_feature_extractor_config.json
    │   ├── empty.txt
    │   ├── input.txt
    │   ├── preprocessor_config.json
    │   ├── sample_text.txt
    │   ├── sample_text_no_unicode.txt
    │   ├── spiece.model
    │   ├── test_sentencepiece.model
    │   ├── test_sentencepiece_bpe.model
    │   ├── test_sentencepiece_no_bos.model
    │   └── tests_samples
    │   │   ├── .gitignore
    │   │   ├── COCO
    │   │       ├── 000000039769.png
    │   │       ├── coco_annotations.txt
    │   │       ├── coco_panoptic
    │   │       │   └── 000000039769.png
    │   │       └── coco_panoptic_annotations.txt
    │   │   ├── GermEval
    │   │       ├── dev.txt
    │   │       ├── labels.txt
    │   │       └── train.txt
    │   │   ├── MRPC
    │   │       ├── dev.csv
    │   │       ├── dev.tsv
    │   │       ├── train.csv
    │   │       └── train.tsv
    │   │   ├── SQUAD
    │   │       └── sample.json
    │   │   ├── STS-B
    │   │       ├── dev.tsv
    │   │       └── train.tsv
    │   │   ├── conll
    │   │       └── sample.json
    │   │   ├── swag
    │   │       └── sample.json
    │   │   ├── wiki_text
    │   │       └── wiki_00
    │   │   ├── wmt16
    │   │       └── sample.json
    │   │   ├── wmt_en_ro
    │   │       ├── test.json
    │   │       ├── train.json
    │   │       └── val.json
    │   │   └── xsum
    │   │       └── sample.json
    ├── sagemaker
    │   ├── README.md
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── scripts
    │   │   ├── pytorch
    │   │   │   ├── requirements.txt
    │   │   │   ├── run_ddp.py
    │   │   │   └── run_glue_model_parallelism.py
    │   │   └── tensorflow
    │   │   │   ├── requirements.txt
    │   │   │   ├── run_tf.py
    │   │   │   └── run_tf_dist.py
    │   ├── test_multi_node_data_parallel.py
    │   ├── test_multi_node_model_parallel.py
    │   └── test_single_node_gpu.py
    ├── test_activations.py
    ├── test_activations_tf.py
    ├── test_benchmark.py
    ├── test_benchmark_tf.py
    ├── test_cli.py
    ├── test_configuration_auto.py
    ├── test_configuration_common.py
    ├── test_data_collator.py
    ├── test_doc_samples.py
    ├── test_feature_extraction_auto.py
    ├── test_feature_extraction_clip.py
    ├── test_feature_extraction_common.py
    ├── test_feature_extraction_deit.py
    ├── test_feature_extraction_detr.py
    ├── test_feature_extraction_speech_to_text.py
    ├── test_feature_extraction_vit.py
    ├── test_feature_extraction_wav2vec2.py
    ├── test_file_utils.py
    ├── test_flax_auto.py
    ├── test_generation_beam_search.py
    ├── test_generation_flax_logits_process.py
    ├── test_generation_flax_utils.py
    ├── test_generation_logits_process.py
    ├── test_generation_stopping_criteria.py
    ├── test_generation_utils.py
    ├── test_hf_api.py
    ├── test_hf_argparser.py
    ├── test_image_utils.py
    ├── test_logging.py
    ├── test_model_card.py
    ├── test_model_output.py
    ├── test_modeling_albert.py
    ├── test_modeling_auto.py
    ├── test_modeling_bart.py
    ├── test_modeling_bert.py
    ├── test_modeling_bert_generation.py
    ├── test_modeling_big_bird.py
    ├── test_modeling_bigbird_pegasus.py
    ├── test_modeling_blenderbot.py
    ├── test_modeling_blenderbot_small.py
    ├── test_modeling_bort.py
    ├── test_modeling_camembert.py
    ├── test_modeling_clip.py
    ├── test_modeling_common.py
    ├── test_modeling_convbert.py
    ├── test_modeling_ctrl.py
    ├── test_modeling_deberta.py
    ├── test_modeling_deberta_v2.py
    ├── test_modeling_deit.py
    ├── test_modeling_detr.py
    ├── test_modeling_distilbert.py
    ├── test_modeling_dpr.py
    ├── test_modeling_electra.py
    ├── test_modeling_encoder_decoder.py
    ├── test_modeling_flaubert.py
    ├── test_modeling_flax_bart.py
    ├── test_modeling_flax_bert.py
    ├── test_modeling_flax_big_bird.py
    ├── test_modeling_flax_clip.py
    ├── test_modeling_flax_common.py
    ├── test_modeling_flax_electra.py
    ├── test_modeling_flax_gpt2.py
    ├── test_modeling_flax_roberta.py
    ├── test_modeling_flax_t5.py
    ├── test_modeling_flax_vit.py
    ├── test_modeling_fsmt.py
    ├── test_modeling_funnel.py
    ├── test_modeling_gpt2.py
    ├── test_modeling_gpt_neo.py
    ├── test_modeling_hubert.py
    ├── test_modeling_ibert.py
    ├── test_modeling_layoutlm.py
    ├── test_modeling_led.py
    ├── test_modeling_longformer.py
    ├── test_modeling_luke.py
    ├── test_modeling_lxmert.py
    ├── test_modeling_m2m_100.py
    ├── test_modeling_marian.py
    ├── test_modeling_mbart.py
    ├── test_modeling_megatron_bert.py
    ├── test_modeling_megatron_gpt2.py
    ├── test_modeling_mobilebert.py
    ├── test_modeling_mpnet.py
    ├── test_modeling_mt5.py
    ├── test_modeling_openai.py
    ├── test_modeling_pegasus.py
    ├── test_modeling_prophetnet.py
    ├── test_modeling_rag.py
    ├── test_modeling_reformer.py
    ├── test_modeling_roberta.py
    ├── test_modeling_roformer.py
    ├── test_modeling_speech_to_text.py
    ├── test_modeling_squeezebert.py
    ├── test_modeling_t5.py
    ├── test_modeling_tapas.py
    ├── test_modeling_tf_albert.py
    ├── test_modeling_tf_auto.py
    ├── test_modeling_tf_bart.py
    ├── test_modeling_tf_bert.py
    ├── test_modeling_tf_blenderbot.py
    ├── test_modeling_tf_blenderbot_small.py
    ├── test_modeling_tf_bort.py
    ├── test_modeling_tf_camembert.py
    ├── test_modeling_tf_common.py
    ├── test_modeling_tf_convbert.py
    ├── test_modeling_tf_ctrl.py
    ├── test_modeling_tf_distilbert.py
    ├── test_modeling_tf_dpr.py
    ├── test_modeling_tf_electra.py
    ├── test_modeling_tf_flaubert.py
    ├── test_modeling_tf_funnel.py
    ├── test_modeling_tf_gpt2.py
    ├── test_modeling_tf_layoutlm.py
    ├── test_modeling_tf_led.py
    ├── test_modeling_tf_longformer.py
    ├── test_modeling_tf_lxmert.py
    ├── test_modeling_tf_marian.py
    ├── test_modeling_tf_mbart.py
    ├── test_modeling_tf_mobilebert.py
    ├── test_modeling_tf_mpnet.py
    ├── test_modeling_tf_mt5.py
    ├── test_modeling_tf_openai.py
    ├── test_modeling_tf_pegasus.py
    ├── test_modeling_tf_pytorch.py
    ├── test_modeling_tf_rag.py
    ├── test_modeling_tf_roberta.py
    ├── test_modeling_tf_roformer.py
    ├── test_modeling_tf_t5.py
    ├── test_modeling_tf_transfo_xl.py
    ├── test_modeling_tf_wav2vec2.py
    ├── test_modeling_tf_xlm.py
    ├── test_modeling_tf_xlm_roberta.py
    ├── test_modeling_tf_xlnet.py
    ├── test_modeling_transfo_xl.py
    ├── test_modeling_visual_bert.py
    ├── test_modeling_vit.py
    ├── test_modeling_wav2vec2.py
    ├── test_modeling_xlm.py
    ├── test_modeling_xlm_prophetnet.py
    ├── test_modeling_xlm_roberta.py
    ├── test_modeling_xlnet.py
    ├── test_offline.py
    ├── test_onnx.py
    ├── test_optimization.py
    ├── test_optimization_tf.py
    ├── test_pipelines_automatic_speech_recognition.py
    ├── test_pipelines_common.py
    ├── test_pipelines_conversational.py
    ├── test_pipelines_feature_extraction.py
    ├── test_pipelines_fill_mask.py
    ├── test_pipelines_image_classification.py
    ├── test_pipelines_question_answering.py
    ├── test_pipelines_summarization.py
    ├── test_pipelines_table_question_answering.py
    ├── test_pipelines_text2text_generation.py
    ├── test_pipelines_text_classification.py
    ├── test_pipelines_text_generation.py
    ├── test_pipelines_token_classification.py
    ├── test_pipelines_translation.py
    ├── test_pipelines_zero_shot.py
    ├── test_processor_clip.py
    ├── test_processor_speech_to_text.py
    ├── test_processor_wav2vec2.py
    ├── test_retrieval_rag.py
    ├── test_sequence_feature_extraction_common.py
    ├── test_skip_decorators.py
    ├── test_tokenization_albert.py
    ├── test_tokenization_auto.py
    ├── test_tokenization_bart.py
    ├── test_tokenization_barthez.py
    ├── test_tokenization_bert.py
    ├── test_tokenization_bert_generation.py
    ├── test_tokenization_bert_japanese.py
    ├── test_tokenization_bertweet.py
    ├── test_tokenization_big_bird.py
    ├── test_tokenization_blenderbot.py
    ├── test_tokenization_byt5.py
    ├── test_tokenization_camembert.py
    ├── test_tokenization_clip.py
    ├── test_tokenization_common.py
    ├── test_tokenization_cpm.py
    ├── test_tokenization_ctrl.py
    ├── test_tokenization_deberta.py
    ├── test_tokenization_deberta_v2.py
    ├── test_tokenization_distilbert.py
    ├── test_tokenization_dpr.py
    ├── test_tokenization_fast.py
    ├── test_tokenization_fsmt.py
    ├── test_tokenization_funnel.py
    ├── test_tokenization_gpt2.py
    ├── test_tokenization_herbert.py
    ├── test_tokenization_layoutlm.py
    ├── test_tokenization_luke.py
    ├── test_tokenization_lxmert.py
    ├── test_tokenization_m2m_100.py
    ├── test_tokenization_marian.py
    ├── test_tokenization_mbart.py
    ├── test_tokenization_mbart50.py
    ├── test_tokenization_mpnet.py
    ├── test_tokenization_openai.py
    ├── test_tokenization_pegasus.py
    ├── test_tokenization_phobert.py
    ├── test_tokenization_prophetnet.py
    ├── test_tokenization_rag.py
    ├── test_tokenization_reformer.py
    ├── test_tokenization_roberta.py
    ├── test_tokenization_roformer.py
    ├── test_tokenization_small_blenderbot.py
    ├── test_tokenization_speech_to_text.py
    ├── test_tokenization_squeezebert.py
    ├── test_tokenization_t5.py
    ├── test_tokenization_tapas.py
    ├── test_tokenization_transfo_xl.py
    ├── test_tokenization_utils.py
    ├── test_tokenization_wav2vec2.py
    ├── test_tokenization_xlm.py
    ├── test_tokenization_xlm_prophetnet.py
    ├── test_tokenization_xlm_roberta.py
    ├── test_tokenization_xlnet.py
    ├── test_trainer.py
    ├── test_trainer_callback.py
    ├── test_trainer_distributed.py
    ├── test_trainer_seq2seq.py
    ├── test_trainer_tpu.py
    ├── test_trainer_utils.py
    ├── test_utils_check_copies.py
    └── test_versions_utils.py
├── tride
    ├── controller.py
    ├── notebook
    │   └── vis_sentiment_neuron.ipynb
    ├── openai_sentiment_neuron
    │   ├── __init__.py
    │   ├── encoder.py
    │   ├── sst_binary_demo.py
    │   └── utils.py
    └── scripts
    │   ├── encode_sst.py
    │   ├── encode_text.py
    │   ├── generate_text.py
    │   ├── sklearn_lr_sst.py
    │   └── visualize_hid.py
├── utils
    ├── check_copies.py
    ├── check_dummies.py
    ├── check_inits.py
    ├── check_repo.py
    ├── check_table.py
    ├── check_tf_ops.py
    ├── class_mapping_update.py
    ├── custom_init_isort.py
    ├── download_glue_data.py
    ├── get_modified_files.py
    ├── link_tester.py
    ├── notification_service.py
    ├── release.py
    ├── style_doc.py
    └── tf_ops
    │   └── onnx.json
└── valohai.yaml


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source=transformers
 3 | omit =
 4 |     # skip convertion scripts from testing for now
 5 |     */convert_*
 6 |     */__main__.py
 7 | [report]
 8 | exclude_lines =
 9 |     pragma: no cover
10 |     raise
11 |     except
12 |     register_parameter


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py	eol=lf
2 | *.rst	eol=lf
3 | *.md	eol=lf


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/---new-benchmark.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F5A5 New benchmark"
 3 | about: Benchmark a part of this library and share your results
 4 | title: "[Benchmark]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🖥 Benchmarking `transformers`
11 | 
12 | ## Benchmark
13 | 
14 | Which part of `transformers` did you benchmark?
15 | 
16 | ## Set-up
17 | 
18 | What did you run your benchmarks on? Please include details, such as: CPU, GPU? If using multiple GPUs, which parallelization did you use?
19 | 
20 | ## Results
21 | 
22 | Put your results here!
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--new-model-addition.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F31F New model addition"
 3 | about: Submit a proposal/request to implement a new Transformer-based model
 4 | title: ''
 5 | labels: New model
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🌟 New model addition
11 | 
12 | ## Model description
13 | 
14 | <!-- Important information -->
15 | 
16 | ## Open source status
17 | 
18 | * [ ] the model implementation is available: (give details)
19 | * [ ] the model weights are available: (give details)
20 | * [ ] who are the authors: (mention them, if possible by @gh-username)
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680 Feature request"
 3 | about: Submit a proposal/request for a new transformers feature
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🚀 Feature request
11 | 
12 | <!-- A clear and concise description of the feature proposal.
13 |      Please provide a link to the paper and code in case they exist. -->
14 | 
15 | ## Motivation
16 | 
17 | <!-- Please outline the motivation for the proposal. Is your feature request
18 |      related to a problem? e.g., I'm always frustrated when [...]. If this is related
19 |      to another GitHub issue, please link here too. -->
20 | 
21 | ## Your contribution
22 | 
23 | <!-- Is there any way that you could help, e.g. by submitting a PR?
24 |      Make sure to read the CONTRIBUTING.MD readme:
25 |      https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md -->
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question-help.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓ Questions & Help"
 3 | about: Post your general questions on the Hugging Face forum: https://discuss.huggingface.co/
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # ❓ Questions & Help
11 | 
12 | <!-- The GitHub issue tracker is primarly intended for bugs, feature requests,
13 |      new models, benchmarks, and migration questions. For all other questions,
14 |      we direct you to the Hugging Face forum: https://discuss.huggingface.co/ .
15 |      -->
16 | 
17 | ## Details
18 | 
19 | <!-- Description of your issue -->
20 | 
21 | <!-- You should first ask your question on the forum, and only if
22 |      you didn't get an answer after a few days ask it here on GitHub. -->
23 | 
24 | **A link to original question on the forum**:
25 | 
26 | <!-- Your issue will be closed if you don't fill this part. -->


--------------------------------------------------------------------------------
/.github/conda/build.sh:
--------------------------------------------------------------------------------
1 | $PYTHON setup.py install     # Python command to install the script.
2 | 


--------------------------------------------------------------------------------
/.github/conda/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "transformers" %}
 2 | 
 3 | package:
 4 |   name: "{{ name|lower }}"
 5 |   version: "{{ TRANSFORMERS_VERSION }}"
 6 | 
 7 | source:
 8 |   path: ../../
 9 | 
10 | build:
11 |   noarch: python
12 | 
13 | requirements:
14 |   host:
15 |     - python
16 |     - pip
17 |     - numpy >=1.17
18 |     - dataclasses
19 |     - importlib_metadata
20 |     - huggingface_hub
21 |     - packaging
22 |     - filelock
23 |     - requests
24 |     - tqdm >=4.27
25 |     - sacremoses
26 |     - regex !=2019.12.17
27 |     - protobuf
28 |     - tokenizers >=0.10.1,<0.11.0
29 |     - pyyaml
30 |   run:
31 |     - python
32 |     - numpy >=1.17
33 |     - dataclasses
34 |     - importlib_metadata
35 |     - huggingface_hub
36 |     - packaging
37 |     - filelock
38 |     - requests
39 |     - tqdm >=4.27
40 |     - sacremoses
41 |     - regex !=2019.12.17
42 |     - protobuf
43 |     - tokenizers >=0.10.1,<0.11.0
44 |     - pyyaml
45 | 
46 | test:
47 |   imports:
48 |     - transformers
49 | 
50 | about:
51 |   home: https://huggingface.co
52 |   license: Apache License 2.0
53 |   license_file: LICENSE
54 |   summary: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch and TensorFlow 2.0."
55 | 


--------------------------------------------------------------------------------
/.github/workflows/github-torch-hub.yml:
--------------------------------------------------------------------------------
 1 | name: Torch hub integration
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "*"
 7 | 
 8 | jobs:
 9 |   torch_hub_integration:
10 |     runs-on: ubuntu-latest
11 |     env:
12 |       # TODO quickfix but may need more investigation
13 |       ACTIONS_ALLOW_UNSECURE_COMMANDS: True
14 |     steps:
15 |     # no checkout necessary here.
16 |     - name: Extract branch name
17 |       run: echo "::set-env name=BRANCH::${GITHUB_REF#refs/heads/}"
18 |     - name: Check branch name
19 |       run: echo $BRANCH
20 |     - name: Set up Python
21 |       uses: actions/setup-python@v1
22 |       with:
23 |         python-version: 3.7
24 | 
25 |     - name: Loading cache
26 |       uses: actions/cache@v2
27 |       id: cache
28 |       with:
29 |         path: ~/.cache/pip
30 |         key: v0-torch_hub-${{ hashFiles('setup.py') }}
31 | 
32 |     - name: Install dependencies
33 |       run: |
34 |         pip install --upgrade pip
35 |         # install torch-hub specific dependencies
36 |         pip install -e git+https://github.com/huggingface/transformers.git#egg=transformers[torchhub]
37 |         # no longer needed
38 |         pip uninstall -y transformers
39 | 
40 |     #- name: Torch hub list
41 |     #  run: |
42 |     #    python -c "import torch; print(torch.hub.list('huggingface/transformers:$BRANCH'))"
43 | 
44 |     #- name: Torch hub help
45 |     #  run: |
46 |     #    python -c "import torch; print(torch.hub.help('huggingface/transformers:$BRANCH', 'modelForSequenceClassification'))"
47 | 


--------------------------------------------------------------------------------
/.github/workflows/release-conda.yml:
--------------------------------------------------------------------------------
 1 | name: Release - Conda
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - v*
 7 |     branches:
 8 |       - conda_*
 9 | 
10 | env:
11 |   ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}
12 | 
13 | jobs:
14 |   build_and_package:
15 |     runs-on: ubuntu-latest
16 |     defaults:
17 |       run:
18 |         shell: bash -l {0}
19 | 
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v1
23 | 
24 |       - name: Install miniconda
25 |         uses: conda-incubator/setup-miniconda@v2
26 |         with:
27 |           auto-update-conda: true
28 |           auto-activate-base: false
29 |           python-version: 3.8
30 |           activate-environment: "build-transformers"
31 |           channels: huggingface
32 | 
33 |       - name: Setup conda env
34 |         run: |
35 |           conda install -c defaults anaconda-client conda-build
36 | 
37 |       - name: Extract version
38 |         run: echo "TRANSFORMERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV
39 | 
40 |       - name: Build conda packages
41 |         run: |
42 |           conda info
43 |           conda list
44 |           conda-build .github/conda
45 | 
46 |       - name: Upload to Anaconda
47 |         run: anaconda upload `conda-build .github/conda --output` --force
48 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: Stale Bot
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 15 * * *"
 6 | 
 7 | jobs:
 8 |   close_stale_issues:
 9 |     name: Close Stale Issues
10 |     if: github.repository == 'huggingface/transformers'
11 |     runs-on: ubuntu-latest
12 |     env:
13 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
14 |     steps:
15 |     - uses: actions/checkout@v2
16 | 
17 |     - name: Setup Python
18 |       uses: actions/setup-python@v1
19 |       with:
20 |         python-version: 3.7
21 | 
22 |     - name: Install requirements
23 |       run: |
24 |         pip install PyGithub
25 |     - name: Close stale issues
26 |       run: |
27 |         python scripts/stale.py


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | 


--------------------------------------------------------------------------------
/docker/transformers-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     tensorflow-cpu \
19 |     torch
20 | 
21 | WORKDIR /workspace
22 | COPY . transformers/
23 | RUN cd transformers/ && \
24 |     python3 -m pip install --no-cache-dir .
25 | 
26 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     tensorflow \
19 |     torch
20 | 
21 | RUN git clone https://github.com/NVIDIA/apex
22 | RUN cd apex && \
23 |     python3 setup.py install && \
24 |     pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
25 | 
26 | WORKDIR /workspace
27 | COPY . transformers/
28 | RUN cd transformers/ && \
29 |     python3 -m pip install --no-cache-dir .
30 | 
31 | CMD ["/bin/bash"]
32 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     torch
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-pytorch-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     torch
19 | 
20 | RUN git clone https://github.com/NVIDIA/apex
21 | RUN cd apex && \
22 |     python3 setup.py install && \
23 |     pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
24 | 
25 | WORKDIR /workspace
26 | COPY . transformers/
27 | RUN cd transformers/ && \
28 |     python3 -m pip install --no-cache-dir .
29 | 
30 | CMD ["/bin/bash"]
31 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-tpu/bert-base-cased.jsonnet:
--------------------------------------------------------------------------------
 1 | local base = import 'templates/base.libsonnet';
 2 | local tpus = import 'templates/tpus.libsonnet';
 3 | local utils = import "templates/utils.libsonnet";
 4 | local volumes = import "templates/volumes.libsonnet";
 5 | 
 6 | local bertBaseCased = base.BaseTest {
 7 |   frameworkPrefix: "hf",
 8 |   modelName: "bert-base-cased",
 9 |   mode: "example",
10 |   configMaps: [],
11 | 
12 |   timeout: 3600, # 1 hour, in seconds
13 | 
14 |   image: std.extVar('image'),
15 |   imageTag: std.extVar('image-tag'),
16 | 
17 |   tpuSettings+: {
18 |     softwareVersion: "pytorch-nightly",
19 |   },
20 |   accelerator: tpus.v3_8,
21 | 
22 |   volumeMap+: {
23 |     datasets: volumes.PersistentVolumeSpec {
24 |       name: "huggingface-cluster-disk",
25 |       mountPath: "/datasets",
26 |     },
27 |   },
28 |   command: utils.scriptCommand(
29 |     |||
30 |       python -m pytest -s transformers/examples/pytorch/test_xla_examples.py -v
31 |       test_exit_code=$?
32 |       echo "\nFinished running commands.\n"
33 |       test $test_exit_code -eq 0
34 |     |||
35 |   ),
36 | };
37 | 
38 | bertBaseCased.oneshotJob
39 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-tpu/dataset.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolume
 3 | metadata:
 4 |   name: huggingface-cluster-disk
 5 | spec:
 6 |   storageClassName: ""
 7 |   capacity:
 8 |     storage: 500Gi
 9 |   accessModes:
10 |     - ReadOnlyMany
11 |   claimRef:
12 |     namespace: default
13 |     name: huggingface-cluster-disk-claim
14 |   gcePersistentDisk:
15 |     pdName: huggingface-cluster-disk
16 |     fsType: ext4
17 |     readOnly: true
18 | ---
19 | apiVersion: v1
20 | kind: PersistentVolumeClaim
21 | metadata:
22 |   name: huggingface-cluster-disk-claim
23 | spec:
24 |   # Specify "" as the storageClassName so it matches the PersistentVolume's StorageClass.
25 |   # A nil storageClassName value uses the default StorageClass. For details, see
26 |   # https://kubernetes.io/docs/concepts/storage/persistent-volumes/#class-1
27 |   storageClassName: ""
28 |   accessModes:
29 |     - ReadOnlyMany
30 |   resources:
31 |     requests:
32 |       storage: 1Ki
33 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-tpu/docker-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | source ~/.bashrc
3 | echo "running docker-entrypoint.sh"
4 | conda activate container
5 | echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS
6 | echo "printed TPU info"
7 | export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}"
8 | exec "$@"#!/bin/bash
9 | 


--------------------------------------------------------------------------------
/docker/transformers-tensorflow-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     tensorflow-cpu
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-tensorflow-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     tensorflow
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/_static/css/Calibre-Light.ttf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/_static/css/Calibre-Medium.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/_static/css/Calibre-Regular.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Thin.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/_static/css/Calibre-Thin.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/code-snippets.css:
--------------------------------------------------------------------------------
 1 | 
 2 | .highlight .c1, .highlight .sd{
 3 |     color: #999
 4 | }
 5 | 
 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc {
 7 |     color: #FB8D68;
 8 | }
 9 | 
10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow {
11 |     color: #6670FF;
12 | }
13 | 
14 | .highlight .gp {
15 |     color: #FB8D68;
16 | }


--------------------------------------------------------------------------------
/docs/source/contributing.md:
--------------------------------------------------------------------------------
1 | ../../CONTRIBUTING.md


--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | ../../examples/README.md


--------------------------------------------------------------------------------
/docs/source/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/favicon.ico


--------------------------------------------------------------------------------
/docs/source/imgs/course_banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/course_banner.png


--------------------------------------------------------------------------------
/docs/source/imgs/local_attention_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/local_attention_mask.png


--------------------------------------------------------------------------------
/docs/source/imgs/ppl_chunked.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/ppl_chunked.gif


--------------------------------------------------------------------------------
/docs/source/imgs/ppl_full.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/ppl_full.gif


--------------------------------------------------------------------------------
/docs/source/imgs/ppl_sliding.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/ppl_sliding.gif


--------------------------------------------------------------------------------
/docs/source/imgs/transformers_logo_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/transformers_logo_name.png


--------------------------------------------------------------------------------
/docs/source/imgs/transformers_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/transformers_overview.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_constant_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_constant_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_cosine_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_linear_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_linear_schedule.png


--------------------------------------------------------------------------------
/docs/source/main_classes/configuration.rst:
--------------------------------------------------------------------------------
 1 | .. 
 2 |     Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 |     Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 5 |     the License. You may obtain a copy of the License at
 6 | 
 7 |         http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |     Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
10 |     an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
11 |     specific language governing permissions and limitations under the License.
12 | 
13 | Configuration
14 | -----------------------------------------------------------------------------------------------------------------------
15 | 
16 | The base class :class:`~transformers.PretrainedConfig` implements the common methods for loading/saving a configuration
17 | either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded
18 | from HuggingFace's AWS S3 repository).
19 | 
20 | 
21 | PretrainedConfig
22 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 | 
24 | .. autoclass:: transformers.PretrainedConfig
25 |     :members:
26 | 


--------------------------------------------------------------------------------
/docs/source/notebooks.md:
--------------------------------------------------------------------------------
1 | ../../notebooks/README.md


--------------------------------------------------------------------------------
/docs/source/troubleshooting.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Troubleshooting
18 | 
19 | This document is to help find solutions for common problems.
20 | 
21 | ## Firewalled environments
22 | 
23 | Some cloud and intranet setups have their GPU instances firewalled to the outside world, so if your script is trying to download model weights or datasets it will first hang and then timeout with an error message like:
24 | 
25 | ```
26 | ValueError: Connection error, and we cannot find the requested files in the cached path.
27 | Please try again or make sure your Internet connection is on.
28 | ```
29 | 
30 | One possible solution in this situation is to use the ["offline-mode"](https://huggingface.co/transformers/installation.html#offline-mode).
31 | 


--------------------------------------------------------------------------------
/examples/flax/language-modeling/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.1.3
2 | jax>=0.2.8
3 | jaxlib>=0.1.59
4 | flax>=0.3.4
5 | optax>=0.0.8
6 | 


--------------------------------------------------------------------------------
/examples/flax/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.1.3
2 | jax>=0.2.8
3 | jaxlib>=0.1.59
4 | flax>=0.3.4
5 | optax>=0.0.8
6 | 


--------------------------------------------------------------------------------
/examples/legacy/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Legacy examples
18 | 
19 | This folder contains examples which are not actively maintained (mostly contributed by the community).
20 | 
21 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working.
22 | 


--------------------------------------------------------------------------------
/examples/legacy/pytorch-lightning/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorboard
 2 | scikit-learn
 3 | seqeval
 4 | psutil
 5 | sacrebleu
 6 | rouge-score
 7 | tensorflow_datasets
 8 | pytorch-lightning==1.0.4
 9 | matplotlib
10 | git-python==1.0.3
11 | faiss-cpu
12 | streamlit
13 | elasticsearch
14 | nltk
15 | pandas
16 | datasets >= 1.1.3
17 | fire
18 | pytest
19 | conllu
20 | sentencepiece != 0.1.92
21 | protobuf
22 | ray
23 | 


--------------------------------------------------------------------------------
/examples/legacy/pytorch-lightning/run_glue.sh:
--------------------------------------------------------------------------------
 1 | # Install example requirements
 2 | pip install -r ../requirements.txt
 3 | 
 4 | # Download glue data
 5 | python3 ../../utils/download_glue_data.py
 6 | 
 7 | export TASK=mrpc
 8 | export DATA_DIR=./glue_data/MRPC/
 9 | export MAX_LENGTH=128
10 | export LEARNING_RATE=2e-5
11 | export BERT_MODEL=bert-base-cased
12 | export BATCH_SIZE=32
13 | export NUM_EPOCHS=3
14 | export SEED=2
15 | export OUTPUT_DIR_NAME=mrpc-pl-bert
16 | export CURRENT_DIR=${PWD}
17 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
18 | 
19 | # Make output directory if it doesn't exist
20 | mkdir -p $OUTPUT_DIR
21 | # Add parent directory to python path to access lightning_base.py
22 | export PYTHONPATH="../":"${PYTHONPATH}"
23 | 
24 | python3 run_glue.py --gpus 1 --data_dir $DATA_DIR \
25 | --task $TASK \
26 | --model_name_or_path $BERT_MODEL \
27 | --output_dir $OUTPUT_DIR \
28 | --max_seq_length  $MAX_LENGTH \
29 | --learning_rate $LEARNING_RATE \
30 | --num_train_epochs $NUM_EPOCHS \
31 | --train_batch_size $BATCH_SIZE \
32 | --seed $SEED \
33 | --do_train \
34 | --do_predict
35 | 


--------------------------------------------------------------------------------
/examples/legacy/pytorch-lightning/run_ner.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # for seqeval metrics import
 4 | pip install -r ../requirements.txt
 5 | 
 6 | ## The relevant files are currently on a shared Google
 7 | ## drive at https://drive.google.com/drive/folders/1kC0I2UGl2ltrluI9NqDjaQJGw5iliw_J
 8 | ## Monitor for changes and eventually migrate to nlp dataset
 9 | curl -L 'https://drive.google.com/uc?export=download&id=1Jjhbal535VVz2ap4v4r_rN1UEHTdLK5P' \
10 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
11 | curl -L 'https://drive.google.com/uc?export=download&id=1ZfRcQThdtAR5PPRjIDtrVP7BtXSCUBbm' \
12 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
13 | curl -L 'https://drive.google.com/uc?export=download&id=1u9mb7kNJHWQCWyweMDRMuTFoOHOfeBTH' \
14 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
15 | 
16 | export MAX_LENGTH=128
17 | export BERT_MODEL=bert-base-multilingual-cased
18 | python3 scripts/preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
19 | python3 scripts/preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
20 | python3 scripts/preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
21 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
22 | export BATCH_SIZE=32
23 | export NUM_EPOCHS=3
24 | export SEED=1
25 | 
26 | export OUTPUT_DIR_NAME=germeval-model
27 | export CURRENT_DIR=${PWD}
28 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
29 | mkdir -p $OUTPUT_DIR
30 | 
31 | # Add parent directory to python path to access lightning_base.py
32 | export PYTHONPATH="../":"${PYTHONPATH}"
33 | 
34 | python3 run_ner.py --data_dir ./ \
35 | --labels ./labels.txt \
36 | --model_name_or_path $BERT_MODEL \
37 | --output_dir $OUTPUT_DIR \
38 | --max_seq_length  $MAX_LENGTH \
39 | --num_train_epochs $NUM_EPOCHS \
40 | --train_batch_size $BATCH_SIZE \
41 | --seed $SEED \
42 | --gpus 1 \
43 | --do_train \
44 | --do_predict
45 | 


--------------------------------------------------------------------------------
/examples/legacy/pytorch-lightning/run_pos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if ! [ -f ./dev.txt ]; then
 3 |   echo "Download dev dataset...."
 4 |   curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu'
 5 | fi
 6 | 
 7 | if ! [ -f ./test.txt ]; then
 8 |   echo "Download test dataset...."
 9 |   curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu'
10 | fi
11 | 
12 | if ! [ -f ./train.txt ]; then
13 |   echo "Download train dataset...."
14 |   curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu'
15 | fi
16 | 
17 | export MAX_LENGTH=200
18 | export BERT_MODEL=bert-base-uncased
19 | export OUTPUT_DIR=postagger-model
20 | export BATCH_SIZE=32
21 | export NUM_EPOCHS=3
22 | export SAVE_STEPS=750
23 | export SEED=1
24 | 
25 | 
26 | # Add parent directory to python path to access lightning_base.py
27 | export PYTHONPATH="../":"${PYTHONPATH}"
28 | 
29 | python3 run_ner.py --data_dir ./ \
30 | --task_type POS \
31 | --model_name_or_path $BERT_MODEL \
32 | --output_dir $OUTPUT_DIR \
33 | --max_seq_length  $MAX_LENGTH \
34 | --num_train_epochs $NUM_EPOCHS \
35 | --train_batch_size $BATCH_SIZE \
36 | --seed $SEED \
37 | --gpus 1 \
38 | --do_train \
39 | --do_predict
40 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | 
5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__)))
6 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/convert_model_to_fp16.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from typing import Union
17 | 
18 | import fire
19 | import torch
20 | from tqdm import tqdm
21 | 
22 | 
23 | def convert(src_path: str, map_location: str = "cpu", save_path: Union[str, None] = None) -> None:
24 |     """Convert a pytorch_model.bin or model.pt file to torch.float16 for faster downloads, less disk space."""
25 |     state_dict = torch.load(src_path, map_location=map_location)
26 |     for k, v in tqdm(state_dict.items()):
27 |         if not isinstance(v, torch.Tensor):
28 |             raise TypeError("FP16 conversion only works on paths that are saved state dicts, like pytorch_model.bin")
29 |         state_dict[k] = v.half()
30 |     if save_path is None:  # overwrite src_path
31 |         save_path = src_path
32 |     torch.save(state_dict, save_path)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     fire.Fire(convert)
37 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/finetune.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path
16 | # run ./finetune.sh --help to see all the possible options
17 | python finetune_trainer.py \
18 |     --learning_rate=3e-5 \
19 |     --fp16 \
20 |     --do_train --do_eval --do_predict \
21 |     --evaluation_strategy steps \
22 |     --predict_with_generate \
23 |     --n_val 1000 \
24 |     "$@"
25 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/finetune_tpu.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | export TPU_NUM_CORES=8
16 | 
17 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path
18 | # run ./finetune_tpu.sh --help to see all the possible options
19 | python xla_spawn.py --num_cores $TPU_NUM_CORES \
20 |     finetune_trainer.py \
21 |     --learning_rate=3e-5 \
22 |     --do_train --do_eval \
23 |     --evaluation_strategy steps \
24 |     --prediction_loss_only \
25 |     --n_val 1000 \
26 |     "$@"
27 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/minify_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from pathlib import Path
17 | 
18 | import fire
19 | 
20 | 
21 | def minify(src_dir: str, dest_dir: str, n: int):
22 |     """Write first n lines of each file f in src_dir to dest_dir/f"""
23 |     src_dir = Path(src_dir)
24 |     dest_dir = Path(dest_dir)
25 |     dest_dir.mkdir(exist_ok=True)
26 |     for path in src_dir.iterdir():
27 |         new = [x.rstrip() for x in list(path.open().readlines())][:n]
28 |         dest_path = dest_dir.joinpath(path.name)
29 |         print(dest_path)
30 |         dest_path.open("w").write("\n".join(new))
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     fire.Fire(minify)
35 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/old_test_tatoeba_conversion.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import tempfile
17 | import unittest
18 | 
19 | from transformers.file_utils import cached_property
20 | from transformers.models.marian.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter
21 | from transformers.testing_utils import slow
22 | 
23 | 
24 | @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.")
25 | class TatoebaConversionTester(unittest.TestCase):
26 |     @cached_property
27 |     def resolver(self):
28 |         tmp_dir = tempfile.mkdtemp()
29 |         return TatoebaConverter(save_dir=tmp_dir)
30 | 
31 |     @slow
32 |     def test_resolver(self):
33 |         self.resolver.convert_models(["heb-eng"])
34 | 
35 |     @slow
36 |     def test_model_card(self):
37 |         content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True)
38 |         assert mmeta["long_pair"] == "heb-eng"
39 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorboard
 2 | scikit-learn
 3 | seqeval
 4 | psutil
 5 | sacrebleu
 6 | rouge-score
 7 | tensorflow_datasets
 8 | matplotlib
 9 | git-python==1.0.3
10 | faiss-cpu
11 | streamlit
12 | elasticsearch
13 | nltk
14 | pandas
15 | datasets >= 1.1.3
16 | fire
17 | pytest
18 | conllu
19 | sentencepiece != 0.1.92
20 | protobuf
21 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/rouge_cli.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import fire
16 | 
17 | from utils import calculate_rouge, save_json
18 | 
19 | 
20 | def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs):
21 |     """Kwargs will be passed to calculate_rouge"""
22 |     pred_lns = [x.strip() for x in open(pred_path).readlines()]
23 |     tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)]
24 |     metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs)
25 |     if save_path is not None:
26 |         save_json(metrics, save_path, indent=None)
27 |     return metrics  # these print nicely
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     fire.Fire(calculate_rouge_path)
32 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/save_randomly_initialized_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import fire
17 | 
18 | from transformers import AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer
19 | 
20 | 
21 | def save_randomly_initialized_version(config_name: str, save_dir: str, **config_kwargs):
22 |     """Save a randomly initialized version of a model using a pretrained config.
23 |     Args:
24 |         config_name: which config to use
25 |         save_dir: where to save the resulting model and tokenizer
26 |         config_kwargs: Passed to AutoConfig
27 | 
28 |     Usage::
29 |         save_randomly_initialized_version("facebook/bart-large-cnn", "distilbart_random_cnn_6_3", encoder_layers=6, decoder_layers=3, num_beams=3)
30 |     """
31 |     cfg = AutoConfig.from_pretrained(config_name, **config_kwargs)
32 |     model = AutoModelForSeq2SeqLM.from_config(cfg)
33 |     model.save_pretrained(save_dir)
34 |     AutoTokenizer.from_pretrained(config_name).save_pretrained(save_dir)
35 |     return model
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     fire.Fire(save_randomly_initialized_version)
40 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/sentence_splitter.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import re
15 | 
16 | from filelock import FileLock
17 | 
18 | 
19 | try:
20 |     import nltk
21 | 
22 |     NLTK_AVAILABLE = True
23 | except (ImportError, ModuleNotFoundError):
24 |     NLTK_AVAILABLE = False
25 | 
26 | if NLTK_AVAILABLE:
27 |     with FileLock(".lock") as lock:
28 |         nltk.download("punkt", quiet=True)
29 | 
30 | 
31 | def add_newline_to_end_of_each_sentence(x: str) -> str:
32 |     """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS."""
33 |     re.sub("<n>", "", x)  # remove pegasus newline char
34 |     assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)"
35 |     return "\n".join(nltk.sent_tokenize(x))
36 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import io
 4 | import json
 5 | import subprocess
 6 | 
 7 | 
 8 | pairs = [
 9 |     ["en", "ru"],
10 |     ["ru", "en"],
11 |     ["en", "de"],
12 |     ["de", "en"],
13 | ]
14 | 
15 | n_objs = 8
16 | 
17 | 
18 | def get_all_data(pairs, n_objs):
19 |     text = {}
20 |     for src, tgt in pairs:
21 |         pair = f"{src}-{tgt}"
22 |         cmd = f"sacrebleu -t wmt19 -l {pair} --echo src".split()
23 |         src_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines()
24 |         cmd = f"sacrebleu -t wmt19 -l {pair} --echo ref".split()
25 |         tgt_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines()
26 |         text[pair] = {"src": src_lines[:n_objs], "tgt": tgt_lines[:n_objs]}
27 |     return text
28 | 
29 | 
30 | text = get_all_data(pairs, n_objs)
31 | filename = "./fsmt_val_data.json"
32 | with io.open(filename, "w", encoding="utf-8") as f:
33 |     bleu_data = json.dump(text, f, indent=2, ensure_ascii=False)
34 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/test_data/test_data:
--------------------------------------------------------------------------------
1 | seq2seq/test_data


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/test_data/wmt_en_ro/train.len:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/legacy/seq2seq/test_data/wmt_en_ro/train.len


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/test_data/wmt_en_ro/val.len:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/legacy/seq2seq/test_data/wmt_en_ro/val.len


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/train_distil_marian_enro.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | export WANDB_PROJECT=distil-marian
16 | export BS=64
17 | export GAS=1
18 | export m=sshleifer/student_marian_en_ro_6_3
19 | export MAX_LEN=128
20 | python finetune_trainer.py \
21 |     --tokenizer_name $m --model_name_or_path $m \
22 |     --data_dir $ENRO_DIR \
23 |     --output_dir marian_en_ro_6_3 --overwrite_output_dir \
24 |     --learning_rate=3e-4 \
25 |     --warmup_steps 500 --sortish_sampler \
26 |     --fp16 \
27 |     --gradient_accumulation_steps=$GAS \
28 |     --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \
29 |     --freeze_encoder --freeze_embeds \
30 |     --num_train_epochs=6 \
31 |     --save_steps 3000 --eval_steps 3000 \
32 |     --max_source_length $MAX_LEN --max_target_length $MAX_LEN \
33 |     --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \
34 |     --do_train --do_eval --do_predict \
35 |     --evaluation_strategy steps \
36 |     --predict_with_generate --logging_first_step \
37 |     --task translation --label_smoothing_factor 0.1 \
38 |     "$@"
39 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/train_distil_marian_enro_tpu.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | export WANDB_PROJECT=distil-marian
16 | export BS=64
17 | export m=sshleifer/student_marian_en_ro_6_3
18 | export MAX_LEN=128
19 | export TPU_NUM_CORES=8
20 | 
21 | python xla_spawn.py --num_cores $TPU_NUM_CORES \
22 |     finetune_trainer.py \
23 |     --tokenizer_name $m --model_name_or_path $m \
24 |     --data_dir $ENRO_DIR \
25 |     --output_dir marian_en_ro_6_3 --overwrite_output_dir \
26 |     --learning_rate=3e-4 \
27 |     --warmup_steps 500 \
28 |     --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \
29 |     --freeze_encoder --freeze_embeds \
30 |     --num_train_epochs=6 \
31 |     --save_steps 500 --eval_steps 500 \
32 |     --logging_first_step --logging_steps 200 \
33 |     --max_source_length $MAX_LEN --max_target_length $MAX_LEN \
34 |     --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \
35 |     --do_train --do_eval \
36 |     --evaluation_strategy steps \
37 |     --prediction_loss_only \
38 |     --task translation --label_smoothing_factor 0.1 \
39 |     "$@"
40 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/train_distilbart_cnn.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | export WANDB_PROJECT=distilbart-trainer
16 | export BS=32
17 | export m=sshleifer/student_cnn_12_6
18 | export tok=facebook/bart-large
19 | export MAX_TGT_LEN=142
20 | 
21 | python finetune_trainer.py \
22 |     --model_name_or_path $m --tokenizer_name $tok \ 
23 |     --data_dir cnn_dm \
24 |     --output_dir distilbart-cnn-12-6 --overwrite_output_dir \
25 |     --learning_rate=3e-5 \
26 |     --warmup_steps 500 --sortish_sampler \
27 |     --fp16 \
28 |     --n_val 500 \
29 |     --gradient_accumulation_steps=1 \
30 |     --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \
31 |     --freeze_encoder --freeze_embeds \
32 |     --num_train_epochs=2 \
33 |     --save_steps 3000 --eval_steps 3000 \
34 |     --logging_first_step \
35 |     --max_target_length 56 --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN\
36 |     --do_train --do_eval --do_predict \
37 |     --evaluation_strategy steps \
38 |     --predict_with_generate --sortish_sampler \
39 |     "$@"
40 | 


--------------------------------------------------------------------------------
/examples/legacy/seq2seq/train_mbart_cc25_enro.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | python finetune_trainer.py \
16 |     --model_name_or_path=facebook/mbart-large-cc25 \
17 |     --data_dir $ENRO_DIR \
18 |     --output_dir mbart_cc25_enro --overwrite_output_dir \
19 |     --learning_rate=3e-5 \
20 |     --warmup_steps 500 \ 
21 |     --fp16 \
22 |     --label_smoothing 0.1 \
23 |     --adam_eps 1e-06 \
24 |     --src_lang en_XX --tgt_lang ro_RO \
25 |     --freeze_embeds \
26 |     --per_device_train_batch_size=4 --per_device_eval_batch_size=4 \
27 |     --max_source_length 128 --max_target_length 128 --val_max_target_length 128 --test_max_target_length 128\
28 |     --sortish_sampler \
29 |     --num_train_epochs 6 \
30 |     --save_steps 25000 --eval_steps 25000 --logging_steps 1000 \
31 |     --do_train --do_eval --do_predict \
32 |     --evaluation_strategy steps \
33 |     --predict_with_generate --logging_first_step \
34 |     --task translation \
35 |     "$@"
36 | 


--------------------------------------------------------------------------------
/examples/legacy/token-classification/run.sh:
--------------------------------------------------------------------------------
 1 | ## The relevant files are currently on a shared Google
 2 | ## drive at https://drive.google.com/drive/folders/1kC0I2UGl2ltrluI9NqDjaQJGw5iliw_J
 3 | ## Monitor for changes and eventually migrate to nlp dataset
 4 | curl -L 'https://drive.google.com/uc?export=download&id=1Jjhbal535VVz2ap4v4r_rN1UEHTdLK5P' \
 5 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
 6 | curl -L 'https://drive.google.com/uc?export=download&id=1ZfRcQThdtAR5PPRjIDtrVP7BtXSCUBbm' \
 7 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
 8 | curl -L 'https://drive.google.com/uc?export=download&id=1u9mb7kNJHWQCWyweMDRMuTFoOHOfeBTH' \
 9 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
10 | 
11 | export MAX_LENGTH=128
12 | export BERT_MODEL=bert-base-multilingual-cased
13 | python3 scripts/preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
14 | python3 scripts/preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
15 | python3 scripts/preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
16 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
17 | export OUTPUT_DIR=germeval-model
18 | export BATCH_SIZE=32
19 | export NUM_EPOCHS=3
20 | export SAVE_STEPS=750
21 | export SEED=1
22 | 
23 | python3 run_ner.py \
24 | --task_type NER \
25 | --data_dir . \
26 | --labels ./labels.txt \
27 | --model_name_or_path $BERT_MODEL \
28 | --output_dir $OUTPUT_DIR \
29 | --max_seq_length  $MAX_LENGTH \
30 | --num_train_epochs $NUM_EPOCHS \
31 | --per_gpu_train_batch_size $BATCH_SIZE \
32 | --save_steps $SAVE_STEPS \
33 | --seed $SEED \
34 | --do_train \
35 | --do_eval \
36 | --do_predict
37 | 


--------------------------------------------------------------------------------
/examples/legacy/token-classification/run_chunk.sh:
--------------------------------------------------------------------------------
 1 | if ! [ -f ./dev.txt ]; then
 2 |   echo "Downloading CONLL2003 dev dataset...."
 3 |   curl -L -o ./dev.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/valid.txt'
 4 | fi
 5 | 
 6 | if ! [ -f ./test.txt ]; then
 7 |   echo "Downloading CONLL2003 test dataset...."
 8 |   curl -L -o ./test.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/test.txt'
 9 | fi
10 | 
11 | if ! [ -f ./train.txt ]; then
12 |   echo "Downloading CONLL2003 train dataset...."
13 |   curl -L -o ./train.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/train.txt'
14 | fi
15 | 
16 | export MAX_LENGTH=200
17 | export BERT_MODEL=bert-base-uncased
18 | export OUTPUT_DIR=chunker-model
19 | export BATCH_SIZE=32
20 | export NUM_EPOCHS=3
21 | export SAVE_STEPS=750
22 | export SEED=1
23 | 
24 | python3 run_ner.py \
25 | --task_type Chunk \
26 | --data_dir . \
27 | --model_name_or_path $BERT_MODEL \
28 | --output_dir $OUTPUT_DIR \
29 | --max_seq_length  $MAX_LENGTH \
30 | --num_train_epochs $NUM_EPOCHS \
31 | --per_gpu_train_batch_size $BATCH_SIZE \
32 | --save_steps $SAVE_STEPS \
33 | --seed $SEED \
34 | --do_train \
35 | --do_eval \
36 | --do_predict
37 | 
38 | 


--------------------------------------------------------------------------------
/examples/legacy/token-classification/run_pos.sh:
--------------------------------------------------------------------------------
 1 | if ! [ -f ./dev.txt ]; then
 2 |   echo "Download dev dataset...."
 3 |   curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu'
 4 | fi
 5 | 
 6 | if ! [ -f ./test.txt ]; then
 7 |   echo "Download test dataset...."
 8 |   curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu'
 9 | fi
10 | 
11 | if ! [ -f ./train.txt ]; then
12 |   echo "Download train dataset...."
13 |   curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu'
14 | fi
15 | 
16 | export MAX_LENGTH=200
17 | export BERT_MODEL=bert-base-uncased
18 | export OUTPUT_DIR=postagger-model
19 | export BATCH_SIZE=32
20 | export NUM_EPOCHS=3
21 | export SAVE_STEPS=750
22 | export SEED=1
23 | 
24 | python3 run_ner.py \
25 | --task_type POS \
26 | --data_dir . \
27 | --model_name_or_path $BERT_MODEL \
28 | --output_dir $OUTPUT_DIR \
29 | --max_seq_length  $MAX_LENGTH \
30 | --num_train_epochs $NUM_EPOCHS \
31 | --per_gpu_train_batch_size $BATCH_SIZE \
32 | --save_steps $SAVE_STEPS \
33 | --seed $SEED \
34 | --do_train \
35 | --do_eval \
36 | --do_predict
37 | 
38 | 


--------------------------------------------------------------------------------
/examples/legacy/token-classification/scripts/preprocess.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from transformers import AutoTokenizer
 4 | 
 5 | 
 6 | dataset = sys.argv[1]
 7 | model_name_or_path = sys.argv[2]
 8 | max_len = int(sys.argv[3])
 9 | 
10 | subword_len_counter = 0
11 | 
12 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
13 | max_len -= tokenizer.num_special_tokens_to_add()
14 | 
15 | with open(dataset, "rt") as f_p:
16 |     for line in f_p:
17 |         line = line.rstrip()
18 | 
19 |         if not line:
20 |             print(line)
21 |             subword_len_counter = 0
22 |             continue
23 | 
24 |         token = line.split()[0]
25 | 
26 |         current_subwords_len = len(tokenizer.tokenize(token))
27 | 
28 |         # Token contains strange control characters like \x96 or \x95
29 |         # Just filter out the complete line
30 |         if current_subwords_len == 0:
31 |             continue
32 | 
33 |         if (subword_len_counter + current_subwords_len) > max_len:
34 |             print("")
35 |             print(line)
36 |             subword_len_counter = current_subwords_len
37 |             continue
38 | 
39 |         subword_len_counter += current_subwords_len
40 | 
41 |         print(line)
42 | 


--------------------------------------------------------------------------------
/examples/pytorch/_tests_requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorboard
 2 | scikit-learn
 3 | seqeval
 4 | psutil
 5 | sacrebleu >= 1.4.12
 6 | rouge-score
 7 | tensorflow_datasets
 8 | matplotlib
 9 | git-python==1.0.3
10 | faiss-cpu
11 | streamlit
12 | elasticsearch
13 | nltk
14 | pandas
15 | datasets >= 1.1.3
16 | fire
17 | pytest
18 | conllu
19 | sentencepiece != 0.1.92
20 | protobuf
21 | 


--------------------------------------------------------------------------------
/examples/pytorch/benchmarking/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # 🤗 Benchmark results
18 | 
19 | Here, you can find a list of the different benchmark results created by the community.
20 | 
21 | If you would like to list benchmark results on your favorite models of the [model hub](https://huggingface.co/models) here, please open a Pull Request and add it below.
22 | 
23 | | Benchmark description | Results | Environment info |      Author      |
24 | |:----------|:-------------|:-------------|------:|
25 | | PyTorch Benchmark on inference for `bert-base-cased` |[memory](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_memory.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 
26 | | PyTorch Benchmark on inference for `bert-base-cased` |[time](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_time.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 
27 | 


--------------------------------------------------------------------------------
/examples/pytorch/benchmarking/requirements.txt:
--------------------------------------------------------------------------------
1 | torch >= 1.3


--------------------------------------------------------------------------------
/examples/pytorch/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 | 
18 | import sys
19 | import warnings
20 | from os.path import abspath, dirname, join
21 | 
22 | 
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(dirname(dirname(__file__))), "src"))
26 | sys.path.insert(1, git_repo_path)
27 | 
28 | 
29 | # silence FutureWarning warnings in tests since often we can't act on them until
30 | # they become normal warnings - i.e. the tests still need to test the current functionality
31 | warnings.simplefilter(action="ignore", category=FutureWarning)
32 | 
33 | 
34 | def pytest_addoption(parser):
35 |     from transformers.testing_utils import pytest_addoption_shared
36 | 
37 |     pytest_addoption_shared(parser)
38 | 
39 | 
40 | def pytest_terminal_summary(terminalreporter):
41 |     from transformers.testing_utils import pytest_terminal_summary_main
42 | 
43 |     make_reports = terminalreporter.config.getoption("--make-reports")
44 |     if make_reports:
45 |         pytest_terminal_summary_main(terminalreporter, id=make_reports)
46 | 


--------------------------------------------------------------------------------
/examples/pytorch/language-modeling/requirements.txt:
--------------------------------------------------------------------------------
1 | torch >= 1.3
2 | datasets >= 1.8.0
3 | sentencepiece != 0.1.92
4 | protobuf
5 | 


--------------------------------------------------------------------------------
/examples/pytorch/multiple-choice/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece != 0.1.92
2 | protobuf
3 | torch >= 1.3
4 | 


--------------------------------------------------------------------------------
/examples/pytorch/multiple-choice/run_no_trainer.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | accelerate launch run_swag_no_trainer.py \
16 |   --model_name_or_path bert-base-uncased \
17 |   --dataset_name swag \
18 |   --output_dir /tmp/test-swag-no-trainer \
19 |   --pad_to_max_length
20 | 


--------------------------------------------------------------------------------
/examples/pytorch/question-answering/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | torch >= 1.3.0
3 | 


--------------------------------------------------------------------------------
/examples/pytorch/summarization/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | protobuf
4 | rouge-score
5 | nltk
6 | py7zr
7 | torch >= 1.3
8 | 


--------------------------------------------------------------------------------
/examples/pytorch/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | datasets >= 1.8.0
3 | sentencepiece != 0.1.92
4 | protobuf
5 | torch >= 1.3
6 | 


--------------------------------------------------------------------------------
/examples/pytorch/text-generation/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | ## Language generation
18 | 
19 | Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/pytorch/text-generation/run_generation.py).
20 | 
21 | Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL, XLNet, CTRL.
22 | A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
23 | can try out the different models available in the library.
24 | 
25 | Example usage:
26 | 
27 | ```bash
28 | python run_generation.py \
29 |     --model_type=gpt2 \
30 |     --model_name_or_path=gpt2
31 | ```
32 | 


--------------------------------------------------------------------------------
/examples/pytorch/text-generation/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece != 0.1.92
2 | protobuf
3 | torch >= 1.3
4 | 


--------------------------------------------------------------------------------
/examples/pytorch/token-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | seqeval
2 | datasets >= 1.8.0
3 | torch >= 1.3
4 | 


--------------------------------------------------------------------------------
/examples/pytorch/token-classification/run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | python3 run_ner.py \
16 |   --model_name_or_path bert-base-uncased \
17 |   --dataset_name conll2003 \
18 |   --output_dir /tmp/test-ner \
19 |   --do_train \
20 |   --do_eval
21 | 


--------------------------------------------------------------------------------
/examples/pytorch/token-classification/run_no_trainer.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | accelerate launch run_ner_no_trainer.py \
16 |   --model_name_or_path bert-base-uncased \
17 |   --dataset_name conll2003 \
18 |   --output_dir /tmp/test-ner \
19 |   --pad_to_max_length \
20 |   --task_name ner \
21 |   --return_entity_level_metrics
22 | 


--------------------------------------------------------------------------------
/examples/pytorch/translation/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | protobuf
4 | sacrebleu >= 1.4.12
5 | py7zr
6 | torch >= 1.3
7 | 


--------------------------------------------------------------------------------
/examples/research_projects/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Research projects
18 | 
19 | This folder contains various research projects using 🤗 Transformers. They are not maintained and require a specific
20 | version of 🤗 Transformers that is indicated in the requirements file of each folder. Updating them to the most recent version of the library will require some work.
21 | 
22 | To use any of them, just run the command
23 | ```
24 | pip install -r requirements.txt
25 | ```
26 | inside the folder of your choice.
27 | 
28 | If you need help with any of those, contact the author(s), indicated at the top of the `README` of each folder.
29 | 


--------------------------------------------------------------------------------
/examples/research_projects/adversarial/README.md:
--------------------------------------------------------------------------------
 1 | ## Adversarial evaluation of model performances
 2 | 
 3 | Here is an example on evaluating a model using adversarial evaluation of natural language inference with the Heuristic Analysis for NLI Systems (HANS) dataset [McCoy et al., 2019](https://arxiv.org/abs/1902.01007). The example was gracefully provided by [Nafise Sadat Moosavi](https://github.com/ns-moosavi).
 4 | 
 5 | The HANS dataset can be downloaded from [this location](https://github.com/tommccoy1/hans).
 6 | 
 7 | This is an example of using test_hans.py:
 8 | 
 9 | ```bash
10 | export HANS_DIR=path-to-hans
11 | export MODEL_TYPE=type-of-the-model-e.g.-bert-roberta-xlnet-etc
12 | export MODEL_PATH=path-to-the-model-directory-that-is-trained-on-NLI-e.g.-by-using-run_glue.py
13 | 
14 | python run_hans.py \
15 |         --task_name hans \
16 |         --model_type $MODEL_TYPE \
17 |         --do_eval \
18 |         --data_dir $HANS_DIR \
19 |         --model_name_or_path $MODEL_PATH \
20 |         --max_seq_length 128 \
21 |         --output_dir $MODEL_PATH \
22 | ```
23 | 
24 | This will create the hans_predictions.txt file in MODEL_PATH, which can then be evaluated using hans/evaluate_heur_output.py from the HANS dataset.
25 | 
26 | The results of the BERT-base model that is trained on MNLI using batch size 8 and the random seed 42 on the HANS dataset is as follows:
27 | 
28 | ```bash
29 | Heuristic entailed results:
30 | lexical_overlap: 0.9702
31 | subsequence: 0.9942
32 | constituent: 0.9962
33 | 
34 | Heuristic non-entailed results:
35 | lexical_overlap: 0.199
36 | subsequence: 0.0396
37 | constituent: 0.118
38 | ```
39 | 


--------------------------------------------------------------------------------
/examples/research_projects/adversarial/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers == 3.5.1
2 | 


--------------------------------------------------------------------------------
/examples/research_projects/bert-loses-patience/pabee/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/bert-loses-patience/pabee/__init__.py


--------------------------------------------------------------------------------
/examples/research_projects/bert-loses-patience/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers == 3.5.1


--------------------------------------------------------------------------------
/examples/research_projects/bert-loses-patience/test_run_glue_with_pabee.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import sys
 4 | from unittest.mock import patch
 5 | 
 6 | import run_glue_with_pabee
 7 | from transformers.testing_utils import TestCasePlus
 8 | 
 9 | 
10 | logging.basicConfig(level=logging.DEBUG)
11 | 
12 | logger = logging.getLogger()
13 | 
14 | 
15 | def get_setup_file():
16 |     parser = argparse.ArgumentParser()
17 |     parser.add_argument("-f")
18 |     args = parser.parse_args()
19 |     return args.f
20 | 
21 | 
22 | class PabeeTests(TestCasePlus):
23 |     def test_run_glue(self):
24 |         stream_handler = logging.StreamHandler(sys.stdout)
25 |         logger.addHandler(stream_handler)
26 | 
27 |         tmp_dir = self.get_auto_remove_tmp_dir()
28 |         testargs = f"""
29 |             run_glue_with_pabee.py
30 |             --model_type albert
31 |             --model_name_or_path albert-base-v2
32 |             --data_dir ./tests/fixtures/tests_samples/MRPC/
33 |             --output_dir {tmp_dir}
34 |             --overwrite_output_dir
35 |             --task_name mrpc
36 |             --do_train
37 |             --do_eval
38 |             --per_gpu_train_batch_size=2
39 |             --per_gpu_eval_batch_size=1
40 |             --learning_rate=2e-5
41 |             --max_steps=50
42 |             --warmup_steps=2
43 |             --seed=42
44 |             --max_seq_length=128
45 |             """.split()
46 | 
47 |         with patch.object(sys, "argv", testargs):
48 |             result = run_glue_with_pabee.main()
49 |             for value in result.values():
50 |                 self.assertGreaterEqual(value, 0.75)
51 | 


--------------------------------------------------------------------------------
/examples/research_projects/bertabs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/bertabs/__init__.py


--------------------------------------------------------------------------------
/examples/research_projects/bertabs/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers == 3.5.1
2 | 
3 | # For ROUGE
4 | nltk
5 | py-rouge
6 | 


--------------------------------------------------------------------------------
/examples/research_projects/bertology/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers == 3.5.1
2 | 


--------------------------------------------------------------------------------
/examples/research_projects/deebert/entropy_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | PATH_TO_DATA=/h/xinji/projects/GLUE
 5 | 
 6 | MODEL_TYPE=bert  # bert or roberta
 7 | MODEL_SIZE=base  # base or large
 8 | DATASET=MRPC  # SST-2, MRPC, RTE, QNLI, QQP, or MNLI
 9 | 
10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE}
11 | if [ $MODEL_TYPE = 'bert' ]
12 | then
13 |   MODEL_NAME=${MODEL_NAME}-uncased
14 | fi
15 | 
16 | ENTROPIES="0 0.1 0.2 0.3 0.4 0.5 0.6 0.7"
17 | 
18 | for ENTROPY in $ENTROPIES; do
19 |   python -u run_glue_deebert.py \
20 |     --model_type $MODEL_TYPE \
21 |     --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
22 |     --task_name $DATASET \
23 |     --do_eval \
24 |     --do_lower_case \
25 |     --data_dir $PATH_TO_DATA/$DATASET \
26 |     --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
27 |     --plot_data_dir ./results/ \
28 |     --max_seq_length 128 \
29 |     --early_exit_entropy $ENTROPY \
30 |     --eval_highway \
31 |     --overwrite_cache \
32 |     --per_gpu_eval_batch_size=1
33 | done
34 | 


--------------------------------------------------------------------------------
/examples/research_projects/deebert/eval_deebert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | PATH_TO_DATA=/h/xinji/projects/GLUE
 5 | 
 6 | MODEL_TYPE=bert  # bert or roberta
 7 | MODEL_SIZE=base  # base or large
 8 | DATASET=MRPC  # SST-2, MRPC, RTE, QNLI, QQP, or MNLI
 9 | 
10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE}
11 | if [ $MODEL_TYPE = 'bert' ]
12 | then
13 |   MODEL_NAME=${MODEL_NAME}-uncased
14 | fi
15 | 
16 | 
17 | python -u run_glue_deebert.py  \
18 |   --model_type $MODEL_TYPE \
19 |   --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
20 |   --task_name $DATASET \
21 |   --do_eval \
22 |   --do_lower_case \
23 |   --data_dir $PATH_TO_DATA/$DATASET \
24 |   --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
25 |   --plot_data_dir ./results/ \
26 |   --max_seq_length 128 \
27 |   --eval_each_highway \
28 |   --eval_highway \
29 |   --overwrite_cache \
30 |   --per_gpu_eval_batch_size=1
31 | 


--------------------------------------------------------------------------------
/examples/research_projects/deebert/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers == 3.5.1
2 | 


--------------------------------------------------------------------------------
/examples/research_projects/deebert/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/deebert/src/__init__.py


--------------------------------------------------------------------------------
/examples/research_projects/deebert/train_deebert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | PATH_TO_DATA=/h/xinji/projects/GLUE
 5 | 
 6 | MODEL_TYPE=bert  # bert or roberta
 7 | MODEL_SIZE=base  # base or large
 8 | DATASET=MRPC  # SST-2, MRPC, RTE, QNLI, QQP, or MNLI
 9 | 
10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE}
11 | EPOCHS=10
12 | if [ $MODEL_TYPE = 'bert' ]
13 | then
14 |   EPOCHS=3
15 |   MODEL_NAME=${MODEL_NAME}-uncased
16 | fi
17 | 
18 | 
19 | python -u run_glue_deebert.py \
20 |   --model_type $MODEL_TYPE \
21 |   --model_name_or_path $MODEL_NAME \
22 |   --task_name $DATASET \
23 |   --do_train \
24 |   --do_eval \
25 |   --do_lower_case \
26 |   --data_dir $PATH_TO_DATA/$DATASET \
27 |   --max_seq_length 128 \
28 |   --per_gpu_eval_batch_size=1 \
29 |   --per_gpu_train_batch_size=8 \
30 |   --learning_rate 2e-5 \
31 |   --num_train_epochs $EPOCHS \
32 |   --overwrite_output_dir \
33 |   --seed 42 \
34 |   --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \
35 |   --plot_data_dir ./results/ \
36 |   --save_steps 0 \
37 |   --overwrite_cache \
38 |   --eval_after_first_stage
39 | 


--------------------------------------------------------------------------------
/examples/research_projects/distillation/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | gitpython==3.0.2
4 | tensorboard>=1.14.0
5 | tensorboardX==1.8
6 | psutil==5.6.6
7 | scipy>=1.4.1
8 | 


--------------------------------------------------------------------------------
/examples/research_projects/distillation/training_configs/distilbert-base-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 28996
14 |   }
15 |   


--------------------------------------------------------------------------------
/examples/research_projects/distillation/training_configs/distilbert-base-multilingual-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 119547
14 |   }
15 |   


--------------------------------------------------------------------------------
/examples/research_projects/distillation/training_configs/distilbert-base-uncased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 30522
14 |   }
15 |   


--------------------------------------------------------------------------------
/examples/research_projects/distillation/training_configs/distilgpt2.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"initializer_range": 0.02,
 3 | 	"layer_norm_epsilon": 0.00001,
 4 | 	"n_ctx": 1024,
 5 | 	"n_embd": 768,
 6 | 	"n_head": 12,
 7 | 	"n_layer": 6,
 8 | 	"n_positions": 1024,
 9 | 	"vocab_size": 50257
10 | }


--------------------------------------------------------------------------------
/examples/research_projects/distillation/training_configs/distilroberta-base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "vocab_size": 50265,
 3 |     "hidden_size": 768,
 4 |     "num_hidden_layers": 6,
 5 |     "num_attention_heads": 12,
 6 |     "intermediate_size": 3072,
 7 |     "hidden_act": "gelu",
 8 |     "hidden_dropout_prob": 0.1,
 9 |     "attention_probs_dropout_prob": 0.1,
10 |     "max_position_embeddings": 514,
11 |     "type_vocab_size": 1,
12 |     "initializer_range": 0.02,
13 |     "layer_norm_eps": 0.00001
14 | }


--------------------------------------------------------------------------------
/examples/research_projects/longform-qa/README.md:
--------------------------------------------------------------------------------
1 | # Long Form Question Answering
2 | 
3 | Author: @yjernite
4 | 
5 | This folder contains the code for the Long Form Question answering [demo](http://35.226.96.115:8080/) as well as methods to train and use a fully end-to-end Long Form Question Answering system using the [🤗transformers](https://github.com/huggingface/transformers) and [🤗datasets](https://github.com/huggingface/datasets) libraries.
6 | 
7 | You can use these methods to train your own system by following along the associate [notebook](https://github.com/huggingface/notebooks/blob/master/longform-qa/Long_Form_Question_Answering_with_ELI5_and_Wikipedia.ipynb) or [blog post](https://yjernite.github.io/lfqa.html).
8 | 


--------------------------------------------------------------------------------
/examples/research_projects/longform-qa/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.1.3
2 | faiss-cpu
3 | streamlit
4 | elasticsearch
5 | 


--------------------------------------------------------------------------------
/examples/research_projects/lxmert/README.md:
--------------------------------------------------------------------------------
1 | # LXMERT DEMO
2 | 
3 | 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate``
4 | 2. install reqs: ``pip install -r ./requirements.txt``
5 | 3. usage is as shown in demo.ipynb
6 | 


--------------------------------------------------------------------------------
/examples/research_projects/mlm_wwm/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.1.3
2 | sentencepiece != 0.1.92
3 | protobuf
4 | ltp
5 | 


--------------------------------------------------------------------------------
/examples/research_projects/mm-imdb/README.md:
--------------------------------------------------------------------------------
 1 | ## MM-IMDb
 2 | 
 3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/master/examples/research_projects/mm-imdb/run_mmimdb.py).
 4 | 
 5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata.
 6 | 
 7 | ### Training on MM-IMDb
 8 | 
 9 | ```
10 | python run_mmimdb.py \
11 |     --data_dir /path/to/mmimdb/dataset/ \
12 |     --model_type bert \
13 |     --model_name_or_path bert-base-uncased \
14 |     --output_dir /path/to/save/dir/ \
15 |     --do_train \
16 |     --do_eval \
17 |     --max_seq_len 512 \
18 |     --gradient_accumulation_steps 20 \
19 |     --num_image_embeds 3 \
20 |     --num_train_epochs 100 \
21 |     --patience 5
22 | ```
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/research_projects/movement-pruning/emmental/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from .configuration_bert_masked import MaskedBertConfig
 3 | from .modeling_bert_masked import (
 4 |     MaskedBertForMultipleChoice,
 5 |     MaskedBertForQuestionAnswering,
 6 |     MaskedBertForSequenceClassification,
 7 |     MaskedBertForTokenClassification,
 8 |     MaskedBertModel,
 9 | )
10 | from .modules import *
11 | 


--------------------------------------------------------------------------------
/examples/research_projects/movement-pruning/emmental/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer
3 | from .masked_nn import MaskedLinear
4 | 


--------------------------------------------------------------------------------
/examples/research_projects/movement-pruning/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.4.0
2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers
3 | knockknock>=0.1.8.1
4 | h5py>=2.10.0
5 | numpy>=1.18.2
6 | scipy>=1.4.1
7 | 


--------------------------------------------------------------------------------
/examples/research_projects/performer/README.md:
--------------------------------------------------------------------------------
 1 | # Performer fine-tuning
 2 | 
 3 | Example authors: @TevenLeScao, @Patrickvonplaten
 4 | 
 5 | Paper authors: Krzysztof Choromanski, Valerii Likhosherstov, David Dohan, Xingyou Song, Andreea Gane, Tamas Sarlos, Peter Hawkins, Jared Davis, Afroz Mohiuddin, Lukasz Kaiser, David Belanger, Lucy Colwell, Adrian Weller
 6 | 
 7 | ## Requirements
 8 | 
 9 | `datasets`, `flax` and `jax`. `wandb` integration is built-in if you want to use it.
10 | 
11 | ## Examples
12 | 
13 | `sanity_script.sh` will launch performer fine-tuning from the bert-base-cased checkpoint on the Simple Wikipedia dataset (a small, easy-language English Wikipedia) from `datasets`.
14 | `full_script.sh` will launch performer fine-tuning from the bert-large-cased checkpoint on the English Wikipedia dataset from `datasets`.
15 | 
16 | Here are a few key arguments:
17 | - Remove the `--performer` argument to use a standard Bert model.
18 |   
19 | - Add `--reinitialize` to start from a blank model rather than a Bert checkpoint. 
20 |   
21 | - You may change the Bert size by passing a different [checkpoint](https://huggingface.co/transformers/pretrained_models.html) to the `--model_name_or_path` argument.
22 | 
23 | - Passing your user name to the `--wandb_user_name` argument will trigger weights and biases logging.
24 | 
25 | - You can choose a dataset with `--dataset_name` and `--dataset_config`. Our [viewer](https://huggingface.co/datasets/viewer/) will help you find what you need.


--------------------------------------------------------------------------------
/examples/research_projects/performer/full_script.sh:
--------------------------------------------------------------------------------
1 | TOKENIZERS_PARALLELISM=true python run_mlm_performer.py  --output_dir experiments --dataset_name wikipedia --dataset_config_name 20200501.en --model_name_or_path bert-large-cased --tokenizer_name bert-large-cased --do_train --overwrite_output_dir --per_device_train_batch_size 4 --learning_rate 5e-4 --warmup_steps 100 --num_train_epochs 3 --performer


--------------------------------------------------------------------------------
/examples/research_projects/performer/sanity_script.sh:
--------------------------------------------------------------------------------
1 | TOKENIZERS_PARALLELISM=true python run_mlm_performer.py  --output_dir experiments --dataset_name wikipedia --dataset_config_name 20200501.simple --model_name_or_path bert-base-cased --tokenizer_name bert-base-cased --do_train --overwrite_output_dir --per_device_train_batch_size 4 --learning_rate 5e-4 --warmup_steps 100 --num_train_epochs 3 --performer


--------------------------------------------------------------------------------
/examples/research_projects/pplm/imgs/headfigure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/pplm/imgs/headfigure.png


--------------------------------------------------------------------------------
/examples/research_projects/pplm/imgs/wooly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/pplm/imgs/wooly.png


--------------------------------------------------------------------------------
/examples/research_projects/pplm/pplm_classification_head.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class ClassificationHead(nn.Module):
 5 |     """Classification Head for  transformer encoders"""
 6 | 
 7 |     def __init__(self, class_size, embed_size):
 8 |         super().__init__()
 9 |         self.class_size = class_size
10 |         self.embed_size = embed_size
11 |         # self.mlp1 = nn.Linear(embed_size, embed_size)
12 |         # self.mlp2 = (nn.Linear(embed_size, class_size))
13 |         self.mlp = nn.Linear(embed_size, class_size)
14 | 
15 |     def forward(self, hidden_state):
16 |         # hidden_state = nn.functional.relu(self.mlp1(hidden_state))
17 |         # hidden_state = self.mlp2(hidden_state)
18 |         logits = self.mlp(hidden_state)
19 |         return logits
20 | 


--------------------------------------------------------------------------------
/examples/research_projects/pplm/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorboard
 2 | scikit-learn
 3 | seqeval
 4 | psutil
 5 | sacrebleu
 6 | rouge-score
 7 | tensorflow_datasets
 8 | pytorch-lightning==1.0.4
 9 | matplotlib
10 | git-python==1.0.3
11 | faiss-cpu
12 | streamlit
13 | elasticsearch
14 | nltk
15 | pandas
16 | datasets >= 1.1.3
17 | fire
18 | pytest
19 | conllu
20 | sentencepiece != 0.1.92
21 | protobuf
22 | transformers==3.5.1
23 | 


--------------------------------------------------------------------------------
/examples/research_projects/rag-end2end-retriever/requirements.txt:
--------------------------------------------------------------------------------
1 | faiss-cpu >= 1.7.0
2 | datasets >= 1.6.2
3 | psutil >= 5.7.0
4 | torch >= 1.4.0
5 | pytorch-lightning ==  1.3.1
6 | nvidia-ml-py3 == 7.352.0
7 | ray >=  1.3.0
8 | 


--------------------------------------------------------------------------------
/examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/train.source:
--------------------------------------------------------------------------------
 1 | What does Moses' rod turn into ?
 2 | Who is Aron?
 3 | Where did Moses grow up ?
 4 | What happens at the command of the Moses ?
 5 | Who manages the Pokémon ?
 6 | Who owned the Pokémon trademark ?
 7 | What else include in Pokémon franchise ?
 8 | How many seasons in Pokémon animme series ?
 9 | What does Moses' rod turn into ?
10 | Who is Aron?
11 | Where did Moses grow up ?
12 | What happens at the command of the Moses ?
13 | Who manages the Pokémon ?
14 | Who owned the Pokémon trademark ?
15 | What else include in Pokémon franchise ?
16 | How many seasons in Pokémon animme series ?
17 | What does Moses' rod turn into ?
18 | Who is Aron?
19 | Where did Moses grow up ?
20 | What happens at the command of the Moses ?
21 | Who manages the Pokémon ?
22 | Who owned the Pokémon trademark ?
23 | What else include in Pokémon franchise ?
24 | How many seasons in Pokémon animme series ?
25 | What does Moses' rod turn into ?
26 | Who is Aron?
27 | Where did Moses grow up ?
28 | What happens at the command of the Moses ?
29 | Who manages the Pokémon ?
30 | Who owned the Pokémon trademark ?
31 | What else include in Pokémon franchise ?
32 | How many seasons in Pokémon animme series ?
33 | What does Moses' rod turn into ?
34 | Who is Aron?
35 | Where did Moses grow up ?
36 | What happens at the command of the Moses ?
37 | Who manages the Pokémon ?
38 | Who owned the Pokémon trademark ?
39 | What else include in Pokémon franchise ?
40 | How many seasons in Pokémon animme series ?
41 | What does Moses' rod turn into ?
42 | Who is Aron?
43 | Where did Moses grow up ?
44 | What happens at the command of the Moses ?
45 | Who manages the Pokémon ?
46 | Who owned the Pokémon trademark ?
47 | What else include in Pokémon franchise ?
48 | How many seasons in Pokémon animme series ?


--------------------------------------------------------------------------------
/examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/train.target:
--------------------------------------------------------------------------------
 1 | to a snake
 2 | Moses' assistant
 3 | Egyptian royal court
 4 | let his rod turn in to a snake
 5 | The Pokémon Company
 6 | Nintendo
 7 | world's top-selling toy brand, the top-selling trading card game 
 8 | over 20 seasons 
 9 | to a snake
10 | Moses' assistant
11 | Egyptian royal court
12 | let his rod turn in to a snake
13 | The Pokémon Company
14 | Nintendo
15 | world's top-selling toy brand, the top-selling trading card game 
16 | over 20 seasons 
17 | to a snake
18 | Moses' assistant
19 | Egyptian royal court
20 | let his rod turn in to a snake
21 | The Pokémon Company
22 | Nintendo
23 | world's top-selling toy brand, the top-selling trading card game 
24 | over 20 seasons 
25 | to a snake
26 | Moses' assistant
27 | Egyptian royal court
28 | let his rod turn in to a snake
29 | The Pokémon Company
30 | Nintendo
31 | world's top-selling toy brand, the top-selling trading card game 
32 | over 20 seasons 
33 | to a snake
34 | Moses' assistant
35 | Egyptian royal court
36 | let his rod turn in to a snake
37 | The Pokémon Company
38 | Nintendo
39 | world's top-selling toy brand, the top-selling trading card game 
40 | over 20 seasons 
41 | to a snake
42 | Moses' assistant
43 | Egyptian royal court
44 | let his rod turn in to a snake
45 | The Pokémon Company
46 | Nintendo
47 | world's top-selling toy brand, the top-selling trading card game 
48 | over 20 seasons 


--------------------------------------------------------------------------------
/examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/val.source:
--------------------------------------------------------------------------------
1 | What does Moses' rod turn into ?
2 | Who is Aron?
3 | Where did Moses grow up ?
4 | What happens at the command of the Moses ?
5 | Who manages the Pokémon ?
6 | Who owned the Pokémon trademark ?
7 | What else include in Pokémon franchise ?
8 | How many seasons in Pokémon animme series ?


--------------------------------------------------------------------------------
/examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/val.target:
--------------------------------------------------------------------------------
1 | to a snake
2 | Moses' assistant
3 | Egyptian royal court
4 | let his rod turn in to a snake
5 | The Pokémon Company
6 | Nintendo
7 | world's top-selling toy brand, the top-selling trading card game 
8 | over 20 seasons 


--------------------------------------------------------------------------------
/examples/research_projects/rag-end2end-retriever/test_run/test_finetune.sh:
--------------------------------------------------------------------------------
 1 | # Add parent directory to python path to access lightning_base.py
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | #creates the custom knowlegebase
 5 | python use_own_knowledge_dataset.py
 6 | 
 7 | 
 8 | # Start a single-node Ray cluster.
 9 | ray start --head
10 | 
11 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path
12 | # run ./examples/rag/finetune_rag_ray.sh --help to see all the possible options
13 | 
14 | 
15 | 
16 | python finetune_rag.py \
17 |     --model_name_or_path facebook/rag-token-base \
18 |     --model_type rag_token \
19 |     --fp16 \
20 |     --gpus 2  \
21 |     --profile \
22 |     --do_train \
23 |     --end2end \
24 |     --do_predict \
25 |     --n_val -1  \
26 |     --train_batch_size 1 \
27 |     --eval_batch_size 1 \
28 |     --max_source_length 128 \
29 |     --max_target_length 25 \
30 |     --val_max_target_length 25 \
31 |     --test_max_target_length 25 \
32 |     --label_smoothing 0.1 \
33 |     --dropout 0.1 \
34 |     --attention_dropout 0.1 \
35 |     --weight_decay 0.001 \
36 |     --adam_epsilon 1e-08 \
37 |     --max_grad_norm 0.1 \
38 |     --lr_scheduler polynomial \
39 |     --learning_rate 3e-05 \
40 |     --num_train_epochs 10 \
41 |     --warmup_steps 500 \
42 |     --gradient_accumulation_steps 1 \
43 |     --distributed_retriever ray \
44 |     --num_retrieval_workers 4  \
45 |     --index_name custom \
46 |     --context_encoder_name facebook/dpr-ctx_encoder-multiset-base \
47 |     --index_gpus 1 \
48 |     --gpu_order [6,7,8,9,0,1,2,3,5,4] \
49 |     --indexing_freq 5
50 |    
51 |     
52 | 
53 | # Stop the Ray cluster.
54 | ray stop
55 | 


--------------------------------------------------------------------------------
/examples/research_projects/rag-end2end-retriever/test_run/test_rag_new_features.sh:
--------------------------------------------------------------------------------
 1 | export PYTHONPATH="../":"${PYTHONPATH}"
 2 | 
 3 | python use_own_knowledge_dataset.py
 4 | 
 5 | ray start --head
 6 | python finetune_rag.py \
 7 |     --model_name_or_path facebook/rag-token-base \
 8 |     --model_type rag_token \
 9 |     --context_encoder_name facebook/dpr-ctx_encoder-multiset-base \
10 |     --fp16 \
11 |     --gpus 1  \
12 |     --profile \
13 |     --end2end \
14 |     --index_name custom
15 | 
16 | ray stop
17 | 


--------------------------------------------------------------------------------
/examples/research_projects/rag/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | 
5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__)))
6 | 


--------------------------------------------------------------------------------
/examples/research_projects/rag/finetune_rag.sh:
--------------------------------------------------------------------------------
 1 | # Add parent directory to python path to access lightning_base.py
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path
 5 | # run ./examples/rag/finetune_rag.sh --help to see all the possible options
 6 | 
 7 | python examples/rag/finetune_rag.py \
 8 |     --data_dir $DATA_DIR \
 9 |     --output_dir $OUTPUT_DIR \
10 |     --model_name_or_path $MODEL_NAME_OR_PATH \
11 |     --model_type rag_sequence \
12 |     --fp16 \
13 |     --gpus 8 \
14 |     --profile \
15 |     --do_train \
16 |     --do_predict \
17 |     --n_val -1 \
18 |     --train_batch_size 8 \
19 |     --eval_batch_size 1 \
20 |     --max_source_length 128 \
21 |     --max_target_length 25 \
22 |     --val_max_target_length 25 \
23 |     --test_max_target_length 25 \
24 |     --label_smoothing 0.1 \
25 |     --dropout 0.1 \
26 |     --attention_dropout 0.1 \
27 |     --weight_decay 0.001 \
28 |     --adam_epsilon 1e-08 \
29 |     --max_grad_norm 0.1 \
30 |     --lr_scheduler polynomial \
31 |     --learning_rate 3e-05 \
32 |     --num_train_epochs 100 \
33 |     --warmup_steps 500 \
34 |     --gradient_accumulation_steps 1 \
35 | 


--------------------------------------------------------------------------------
/examples/research_projects/rag/finetune_rag_ray.sh:
--------------------------------------------------------------------------------
 1 | # Sample script to finetune RAG using Ray for distributed retrieval.
 2 | 
 3 | # Add parent directory to python path to access lightning_base.py
 4 | export PYTHONPATH="../":"${PYTHONPATH}"
 5 | 
 6 | # Start a single-node Ray cluster.
 7 | ray start --head
 8 | 
 9 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path
10 | # run ./examples/rag/finetune_rag_ray.sh --help to see all the possible options
11 | 
12 | python examples/rag/finetune_rag.py \
13 |     --data_dir $DATA_DIR \
14 |     --output_dir $OUTPUT_DIR \
15 |     --model_name_or_path $MODEL_NAME_OR_PATH \
16 |     --model_type rag_sequence \
17 |     --fp16 \
18 |     --gpus 8 \
19 |     --profile \
20 |     --do_train \
21 |     --do_predict \
22 |     --n_val -1 \
23 |     --train_batch_size 8 \
24 |     --eval_batch_size 1 \
25 |     --max_source_length 128 \
26 |     --max_target_length 25 \
27 |     --val_max_target_length 25 \
28 |     --test_max_target_length 25 \
29 |     --label_smoothing 0.1 \
30 |     --dropout 0.1 \
31 |     --attention_dropout 0.1 \
32 |     --weight_decay 0.001 \
33 |     --adam_epsilon 1e-08 \
34 |     --max_grad_norm 0.1 \
35 |     --lr_scheduler polynomial \
36 |     --learning_rate 3e-05 \
37 |     --num_train_epochs 100 \
38 |     --warmup_steps 500 \
39 |     --gradient_accumulation_steps 1 \
40 |     --distributed_retriever ray \
41 |     --num_retrieval_workers 4
42 | 
43 | # Stop the Ray cluster.
44 | ray stop
45 | 


--------------------------------------------------------------------------------
/examples/research_projects/rag/parse_dpr_relevance_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script reads DPR retriever training data and parses each datapoint. We save a line per datapoint.
 3 | Each line consists of the query followed by a tab-separated list of Wikipedia page titles constituting
 4 | positive contexts for a given query.
 5 | """
 6 | 
 7 | import argparse
 8 | import json
 9 | 
10 | from tqdm import tqdm
11 | 
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser()
15 | 
16 |     # Required parameters
17 |     parser.add_argument(
18 |         "--src_path",
19 |         type=str,
20 |         default="biencoder-nq-dev.json",
21 |         help="Path to raw DPR training data",
22 |     )
23 |     parser.add_argument(
24 |         "--evaluation_set",
25 |         type=str,
26 |         help="where to store parsed evaluation_set file",
27 |     )
28 |     parser.add_argument(
29 |         "--gold_data_path",
30 |         type=str,
31 |         help="where to store parsed gold_data_path file",
32 |     )
33 |     args = parser.parse_args()
34 | 
35 |     with open(args.src_path, "r") as src_file, open(args.evaluation_set, "w") as eval_file, open(
36 |         args.gold_data_path, "w"
37 |     ) as gold_file:
38 |         dpr_records = json.load(src_file)
39 |         for dpr_record in tqdm(dpr_records):
40 |             question = dpr_record["question"]
41 |             contexts = [context["title"] for context in dpr_record["positive_ctxs"]]
42 |             eval_file.write(question + "\n")
43 |             gold_file.write("\t".join(contexts) + "\n")
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/examples/research_projects/rag/requirements.txt:
--------------------------------------------------------------------------------
1 | faiss-cpu >= 1.6.3
2 | datasets >= 1.0.1
3 | psutil >= 5.7.0
4 | torch >= 1.4.0
5 | transformers
6 | pytorch-lightning==1.3.1
7 | GitPython


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/_test_make_student.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | import unittest
 3 | 
 4 | from make_student import create_student_by_copying_alternating_layers
 5 | from transformers import AutoConfig
 6 | from transformers.file_utils import cached_property
 7 | from transformers.testing_utils import require_torch
 8 | 
 9 | 
10 | TINY_BART = "sshleifer/bart-tiny-random"
11 | TINY_T5 = "patrickvonplaten/t5-tiny-random"
12 | 
13 | 
14 | @require_torch
15 | class MakeStudentTester(unittest.TestCase):
16 |     @cached_property
17 |     def teacher_config(self):
18 |         return AutoConfig.from_pretrained(TINY_BART)
19 | 
20 |     def test_valid_t5(self):
21 |         student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1)
22 |         self.assertEqual(student.config.num_hidden_layers, 1)
23 | 
24 |     def test_asymmetric_t5(self):
25 |         student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None)
26 | 
27 |     def test_same_decoder_small_encoder(self):
28 |         student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None)
29 |         self.assertEqual(student.config.encoder_layers, 1)
30 |         self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers)
31 | 
32 |     def test_small_enc_small_dec(self):
33 |         student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1)
34 |         self.assertEqual(student.config.encoder_layers, 1)
35 |         self.assertEqual(student.config.decoder_layers, 1)
36 | 
37 |     def test_raises_assert(self):
38 |         with self.assertRaises(AssertionError):
39 |             create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None)
40 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/distil_marian_enro_teacher.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | export WANDB_PROJECT=dmar
 4 | # export MAX_LEN=128
 5 | python distillation.py \
 6 |   --learning_rate=3e-4 \
 7 |   --do_train \
 8 |   --fp16 \
 9 |   --val_check_interval 0.25 \
10 |   --teacher Helsinki-NLP/opus-mt-en-ro \
11 |   --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
12 |   --student_decoder_layers 3 --student_encoder_layers 6 \
13 |   --freeze_encoder --freeze_embeds \
14 |   --model_name_or_path IGNORED \
15 |   --alpha_hid=3. \
16 |   --train_batch_size=$BS --eval_batch_size=$BS \
17 |   --tokenizer_name Helsinki-NLP/opus-mt-en-ro \
18 |   --warmup_steps 500 --logger_name wandb \
19 |   --fp16_opt_level O1 --task translation --normalize_hidden --num_sanity_val_steps=0 \
20 |   "$@"
21 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/distil_marian_no_teacher.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | export WANDB_PROJECT=dmar
 4 | export MAX_LEN=128
 5 | python finetune.py \
 6 |   --learning_rate=3e-4 \
 7 |   --do_train \
 8 |   --do_predict \
 9 |   --fp16 \
10 |   --val_check_interval 0.25 \
11 |   --data_dir $ENRO_DIR \
12 |   --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
13 |   --freeze_encoder --freeze_embeds \
14 |   --train_batch_size=$BS --eval_batch_size=$BS \
15 |   --tokenizer_name $m --model_name_or_path $m \
16 |   --warmup_steps 500 --sortish_sampler --logger_name wandb \
17 |   --gpus 1 --fp16_opt_level=O1 --task translation --num_sanity_val_steps=0 \
18 |   "$@"
19 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/dynamic_bs_example.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | export WANDB_PROJECT=dmar
 4 | export MAX_LEN=128
 5 | export m=sshleifer/student_marian_en_ro_6_1
 6 | python finetune.py \
 7 |   --learning_rate=3e-4 \
 8 |   --do_train \
 9 |   --fp16 \
10 |   --data_dir wmt_en_ro \
11 |   --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
12 |   --freeze_encoder --freeze_embeds \
13 |   --train_batch_size=48 --eval_batch_size=64 \
14 |   --tokenizer_name $m --model_name_or_path $m --num_train_epochs=1 \
15 |   --warmup_steps 500 --logger_name wandb --gpus 1 \
16 |   --fp16_opt_level=O1 --task translation \
17 |   "$@"
18 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/finetune.sh:
--------------------------------------------------------------------------------
 1 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path
 2 | # run ./finetune.sh --help to see all the possible options
 3 | python finetune.py \
 4 |     --learning_rate=3e-5 \
 5 |     --fp16 \
 6 |     --gpus 1 \
 7 |     --do_train \
 8 |     --do_predict \
 9 |     --n_val 1000 \
10 |     --val_check_interval 0.1 \
11 |     "$@"
12 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/finetune_bart_tiny.sh:
--------------------------------------------------------------------------------
 1 | # Script for verifying that run_bart_sum can be invoked from its directory
 2 | 
 3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test)
 4 | wget https://cdn-datasets.huggingface.co/summarization/cnn_tiny.tgz
 5 | tar -xzvf cnn_tiny.tgz
 6 | rm cnn_tiny.tgz
 7 | 
 8 | export OUTPUT_DIR_NAME=bart_utest_output
 9 | export CURRENT_DIR=${PWD}
10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
11 | 
12 | # Make output directory if it doesn't exist
13 | mkdir -p $OUTPUT_DIR
14 | 
15 | # Add parent directory to python path to access lightning_base.py and testing_utils.py
16 | export PYTHONPATH="../":"${PYTHONPATH}"
17 | python finetune.py \
18 | --data_dir=cnn_tiny/ \
19 | --model_name_or_path=sshleifer/bart-tiny-random \
20 | --learning_rate=3e-5 \
21 | --train_batch_size=2 \
22 | --eval_batch_size=2 \
23 | --output_dir=$OUTPUT_DIR \
24 | --num_train_epochs=1  \
25 | --gpus=0 \
26 | --do_train "$@"
27 | 
28 | rm -rf cnn_tiny
29 | rm -rf $OUTPUT_DIR
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/finetune_pegasus_xsum.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | # From appendix C of paper https://arxiv.org/abs/1912.08777
 5 | # Set --gradient_accumulation_steps  so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16)
 6 | python finetune.py \
 7 |     --learning_rate=1e-4 \
 8 |     --do_train \
 9 |     --do_predict \
10 |     --n_val 1000 \
11 |     --val_check_interval 0.25 \
12 |     --max_source_length 512 --max_target_length 56 \
13 |     --freeze_embeds --label_smoothing 0.1 --adafactor --task summarization_xsum \
14 |     "$@"
15 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/finetune_t5.sh:
--------------------------------------------------------------------------------
 1 | # Add parent directory to python path to access lightning_base.py
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | python finetune.py \
 5 | --data_dir=$CNN_DIR \
 6 | --learning_rate=3e-5 \
 7 | --train_batch_size=$BS \
 8 | --eval_batch_size=$BS \
 9 | --output_dir=$OUTPUT_DIR \
10 | --max_source_length=512 \
11 | --max_target_length=56 \
12 | --val_check_interval=0.1 --n_val=200 \
13 | --do_train --do_predict \
14 |  "$@"
15 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorboard
 2 | scikit-learn
 3 | psutil
 4 | sacrebleu
 5 | rouge-score
 6 | tensorflow_datasets
 7 | pytorch-lightning==1.0.4
 8 | matplotlib
 9 | git-python==1.0.3
10 | faiss-cpu
11 | streamlit
12 | elasticsearch
13 | nltk
14 | pandas
15 | datasets >= 1.1.3
16 | fire
17 | pytest
18 | conllu
19 | sentencepiece != 0.1.92
20 | protobuf
21 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/sentence_splitter.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from filelock import FileLock
 4 | 
 5 | 
 6 | try:
 7 |     import nltk
 8 | 
 9 |     NLTK_AVAILABLE = True
10 | except (ImportError, ModuleNotFoundError):
11 |     NLTK_AVAILABLE = False
12 | 
13 | if NLTK_AVAILABLE:
14 |     with FileLock(".lock") as lock:
15 |         nltk.download("punkt", quiet=True)
16 | 
17 | 
18 | def add_newline_to_end_of_each_sentence(x: str) -> str:
19 |     """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS."""
20 |     re.sub("<n>", "", x)  # remove pegasus newline char
21 |     assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)"
22 |     return "\n".join(nltk.sent_tokenize(x))
23 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/train_distilbart_cnn.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | export BS=32
 5 | export GAS=1
 6 | 
 7 | python finetune.py \
 8 |     --learning_rate=3e-5 \
 9 |     --fp16 \
10 |     --gpus 1 \
11 |     --do_train \
12 |     --do_predict \
13 |     --val_check_interval 0.25 \
14 |     --n_val 500 \
15 |     --num_train_epochs 2 \
16 |     --freeze_encoder --freeze_embeds --data_dir cnn_dm \
17 |     --max_target_length 142 --val_max_target_length=142 \
18 |     --train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \
19 |     --model_name_or_path sshleifer/student_cnn_12_6 \
20 |     --tokenizer_name facebook/bart-large \
21 |     --warmup_steps 500 \
22 |     --output_dir distilbart-cnn-12-6 \
23 |     "$@"
24 | 
25 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/train_distilbart_xsum.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | python distillation.py \
 4 |   --teacher facebook/bart-large-xsum --data_dir xsum \
 5 |   --tokenizer_name facebook/bart-large-xsum \
 6 |   --student_decoder_layers 6 --student_encoder_layers 12 \
 7 |   --freeze_encoder --freeze_embeds \
 8 |   --learning_rate=3e-4 \
 9 |   --do_train \
10 |   --do_predict \
11 |   --fp16 --fp16_opt_level=O1 \
12 |   --val_check_interval 0.1 --n_val 1000 --eval_beams 2 --length_penalty=0.5 \
13 |   --max_target_length=60 --val_max_target_length=60 --test_max_target_length=100 \
14 |   --model_name_or_path IGNORED \
15 |   --alpha_hid=3. \
16 |   --train_batch_size=16 --eval_batch_size=16 --gradient_accumulation_steps=2 \
17 |   --sortish_sampler \
18 |   --num_train_epochs=6 \
19 |   --warmup_steps 500 \
20 |   --output_dir distilbart_xsum_12_6 \
21 |   "$@"
22 | 


--------------------------------------------------------------------------------
/examples/research_projects/seq2seq-distillation/train_mbart_cc25_enro.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export PYTHONPATH="../":"${PYTHONPATH}"
 3 | 
 4 | python finetune.py \
 5 |     --learning_rate=3e-5 \
 6 |     --fp16 \
 7 |     --do_train \
 8 |     --val_check_interval=0.25 \
 9 |     --adam_eps 1e-06 \
10 |     --num_train_epochs 6 --src_lang en_XX --tgt_lang ro_RO \
11 |     --data_dir $ENRO_DIR \
12 |     --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
13 |     --train_batch_size=$BS --eval_batch_size=$BS \
14 |     --task translation \
15 |     --warmup_steps 500 \
16 |     --freeze_embeds \
17 |     --model_name_or_path=facebook/mbart-large-cc25 \
18 |     "$@"
19 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/ds_config_wav2vec2_zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 | 
11 |     "optimizer": {
12 |         "type": "AdamW",
13 |         "params": {
14 |             "lr": "auto",
15 |             "betas": "auto",
16 |             "eps": "auto",
17 |             "weight_decay": "auto"
18 |         }
19 |     },
20 | 
21 |     "scheduler": {
22 |         "type": "WarmupLR",
23 |         "params": {
24 |             "warmup_min_lr": "auto",
25 |             "warmup_max_lr": "auto",
26 |             "warmup_num_steps": "auto"
27 |         }
28 |     },
29 | 
30 |     "zero_optimization": {
31 |         "stage": 2,
32 |         "offload_optimizer": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "find_unused_parameters": true,
37 |         "allgather_partitions": true,
38 |         "allgather_bucket_size": 2e8,
39 |         "overlap_comm": true,
40 |         "reduce_scatter": true,
41 |         "reduce_bucket_size": 2e8,
42 |         "contiguous_gradients": true
43 |     },
44 | 
45 |     "gradient_accumulation_steps": "auto",
46 |     "gradient_clipping": "auto",
47 |     "steps_per_print": 2000,
48 |     "train_batch_size": "auto",
49 |     "train_micro_batch_size_per_gpu": "auto",
50 |     "wall_clock_breakdown": false
51 | }
52 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/ds_config_wav2vec2_zero3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 | 
11 |     "optimizer": {
12 |         "type": "AdamW",
13 |         "params": {
14 |             "lr": "auto",
15 |             "betas": "auto",
16 |             "eps": "auto",
17 |             "weight_decay": "auto"
18 |         }
19 |     },
20 | 
21 |     "scheduler": {
22 |         "type": "WarmupLR",
23 |         "params": {
24 |             "warmup_min_lr": "auto",
25 |             "warmup_max_lr": "auto",
26 |             "warmup_num_steps": "auto"
27 |         }
28 |     },
29 | 
30 |     "zero_optimization": {
31 |         "stage": 3,
32 |         "offload_optimizer": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "offload_param": {
37 |             "device": "cpu",
38 |             "pin_memory": true
39 |         },
40 |         "overlap_comm": true,
41 |         "contiguous_gradients": true,
42 |         "sub_group_size": 1e9,
43 |         "reduce_bucket_size": "auto",
44 |         "stage3_prefetch_bucket_size": "auto",
45 |         "stage3_param_persistence_threshold": "auto",
46 |         "stage3_max_live_parameters": 1e9,
47 |         "stage3_max_reuse_distance": 1e9,
48 |         "stage3_gather_fp16_weights_on_model_save": true
49 |     },
50 | 
51 |     "gradient_accumulation_steps": "auto",
52 |     "gradient_clipping": "auto",
53 |     "steps_per_print": 2000,
54 |     "train_batch_size": "auto",
55 |     "train_micro_batch_size_per_gpu": "auto",
56 |     "wall_clock_breakdown": false
57 | }
58 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/finetune_base_100.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python run_asr.py \
 3 | --output_dir="./wav2vec2-base-100h" \
 4 | --num_train_epochs="30" \
 5 | --per_device_train_batch_size="32" \
 6 | --per_device_eval_batch_size="32" \
 7 | --evaluation_strategy="steps" \
 8 | --save_total_limit="3" \
 9 | --save_steps="500" \
10 | --eval_steps="100" \
11 | --logging_steps="50" \
12 | --learning_rate="5e-4" \
13 | --warmup_steps="3000" \
14 | --model_name_or_path="facebook/wav2vec2-base" \
15 | --fp16 \
16 | --dataset_name="librispeech_asr" \
17 | --dataset_config_name="clean" \
18 | --train_split_name="train.100" \
19 | --preprocessing_num_workers="32" \
20 | --group_by_length \
21 | --freeze_feature_extractor
22 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/finetune_base_timit_asr.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python run_asr.py \
 3 | --output_dir="./wav2vec2-base-timit-asr" \
 4 | --num_train_epochs="30" \
 5 | --per_device_train_batch_size="20" \
 6 | --per_device_eval_batch_size="20" \
 7 | --evaluation_strategy="steps" \
 8 | --save_steps="500" \
 9 | --eval_steps="100" \
10 | --logging_steps="50" \
11 | --learning_rate="5e-4" \
12 | --warmup_steps="3000" \
13 | --model_name_or_path="facebook/wav2vec2-base" \
14 | --fp16 \
15 | --dataset_name="timit_asr" \
16 | --train_split_name="train" \
17 | --validation_split_name="test" \
18 | --orthography="timit" \
19 | --preprocessing_num_workers="$(nproc)" \
20 | --group_by_length \
21 | --freeze_feature_extractor \
22 | --verbose_logging \
23 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/finetune_large_lv60_100.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python run_asr.py \
 3 | --output_dir="./wav2vec2-large-lv60-100h" \
 4 | --num_train_epochs="30" \
 5 | --per_device_train_batch_size="16" \
 6 | --per_device_eval_batch_size="16" \
 7 | --evaluation_strategy="steps" \
 8 | --save_total_limit="3" \
 9 | --save_steps="500" \
10 | --eval_steps="100" \
11 | --logging_steps="50" \
12 | --learning_rate="5e-4" \
13 | --warmup_steps="3000" \
14 | --model_name_or_path="facebook/wav2vec2-large-lv60" \
15 | --fp16 \
16 | --dataset_name="librispeech_asr" \
17 | --dataset_config_name="clean" \
18 | --train_split_name="train.100" \
19 | --preprocessing_num_workers="32" \
20 | --group_by_length \
21 | --freeze_feature_extractor
22 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/finetune_large_lv60_timit_asr.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python run_asr.py \
 3 | --output_dir="./wav2vec2-large-lv60-timit-asr" \
 4 | --num_train_epochs="30" \
 5 | --per_device_train_batch_size="2" \
 6 | --per_device_eval_batch_size="2" \
 7 | --gradient_accumulation_steps="4" \
 8 | --evaluation_strategy="steps" \
 9 | --save_steps="500" \
10 | --eval_steps="100" \
11 | --logging_steps="50" \
12 | --learning_rate="5e-4" \
13 | --warmup_steps="3000" \
14 | --model_name_or_path="facebook/wav2vec2-large-lv60" \
15 | --fp16 \
16 | --dataset_name="timit_asr" \
17 | --train_split_name="train" \
18 | --validation_split_name="test" \
19 | --orthography="timit" \
20 | --preprocessing_num_workers="$(nproc)" \
21 | --group_by_length \
22 | --freeze_feature_extractor \
23 | --verbose_logging \
24 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/finetune_large_xlsr_53_arabic_speech_corpus.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python run_asr.py \
 3 | --output_dir="./wav2vec2-large-xlsr-53-arabic-speech-corpus" \
 4 | --num_train_epochs="50" \
 5 | --per_device_train_batch_size="1" \
 6 | --per_device_eval_batch_size="1" \
 7 | --gradient_accumulation_steps="8" \
 8 | --evaluation_strategy="steps" \
 9 | --save_steps="500" \
10 | --eval_steps="100" \
11 | --logging_steps="50" \
12 | --learning_rate="5e-4" \
13 | --warmup_steps="3000" \
14 | --model_name_or_path="elgeish/wav2vec2-large-xlsr-53-arabic" \
15 | --fp16 \
16 | --dataset_name="arabic_speech_corpus" \
17 | --train_split_name="train" \
18 | --validation_split_name="test" \
19 | --max_duration_in_seconds="15" \
20 | --orthography="buckwalter" \
21 | --preprocessing_num_workers="$(nproc)" \
22 | --group_by_length \
23 | --freeze_feature_extractor \
24 | --target_feature_extractor_sampling_rate \
25 | --verbose_logging \
26 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/finetune_wav2vec2_xlsr_turkish.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python run_common_voice.py \
 3 |     --model_name_or_path="facebook/wav2vec2-large-xlsr-53" \
 4 |     --dataset_config_name="tr" \
 5 |     --output_dir=./wav2vec2-large-xlsr-turkish-demo \
 6 |     --overwrite_output_dir \
 7 |     --num_train_epochs="5" \
 8 |     --per_device_train_batch_size="16" \
 9 |     --evaluation_strategy="steps" \
10 |     --learning_rate="3e-4" \
11 |     --warmup_steps="500" \
12 |     --fp16 \
13 |     --freeze_feature_extractor \
14 |     --save_steps="400" \
15 |     --eval_steps="400" \
16 |     --save_total_limit="3" \
17 |     --logging_steps="400" \
18 |     --group_by_length \
19 |     --feat_proj_dropout="0.0" \
20 |     --layerdrop="0.1" \
21 |     --gradient_checkpointing \
22 |     --do_train --do_eval
23 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | datasets
3 | torch>=1.5.0
4 | torchaudio
5 | jiwer==2.2.0
6 | lang-trans==0.6.0
7 | librosa==0.8.0
8 | 


--------------------------------------------------------------------------------
/examples/research_projects/wav2vec2/vocab/buckwalter.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "<pad>": 0,
 3 |     "<s>": 1,
 4 |     "</s>": 2,
 5 |     "<unk>": 3,
 6 |     "/": 4,
 7 |     "'": 5,
 8 |     "|": 6,
 9 |     ">": 7,
10 |     "&": 8,
11 |     "<": 9,
12 |     "}": 10,
13 |     "A": 11,
14 |     "b": 12,
15 |     "p": 13,
16 |     "t": 14,
17 |     "v": 15,
18 |     "j": 16,
19 |     "H": 17,
20 |     "x": 18,
21 |     "d": 19,
22 |     "*": 20,
23 |     "r": 21,
24 |     "z": 22,
25 |     "s": 23,
26 |     "$": 24,
27 |     "S": 25,
28 |     "D": 26,
29 |     "T": 27,
30 |     "Z": 28,
31 |     "E": 29,
32 |     "g": 30,
33 |     "_": 31,
34 |     "f": 32,
35 |     "q": 33,
36 |     "k": 34,
37 |     "l": 35,
38 |     "m": 36,
39 |     "n": 37,
40 |     "h": 38,
41 |     "w": 39,
42 |     "Y": 40,
43 |     "y": 41,
44 |     "F": 42,
45 |     "N": 43,
46 |     "K": 44,
47 |     "a": 45,
48 |     "u": 46,
49 |     "i": 47,
50 |     "~": 48,
51 |     "o": 49,
52 |     "`": 50,
53 |     "{": 51,
54 |     "P": 52,
55 |     "J": 53,
56 |     "V": 54,
57 |     "G": 55
58 | }


--------------------------------------------------------------------------------
/examples/tensorflow/benchmarking/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # 🤗 Benchmark results
18 | 
19 | Here, you can find a list of the different benchmark results created by the community.
20 | 
21 | If you would like to list benchmark results on your favorite models of the [model hub](https://huggingface.co/models) here, please open a Pull Request and add it below.
22 | 
23 | | Benchmark description | Results | Environment info |      Author      |
24 | |:----------|:-------------|:-------------|------:|
25 | | PyTorch Benchmark on inference for `bert-base-cased` |[memory](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_memory.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 
26 | | PyTorch Benchmark on inference for `bert-base-cased` |[time](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_time.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 
27 | 


--------------------------------------------------------------------------------
/examples/tensorflow/benchmarking/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow >= 2.3


--------------------------------------------------------------------------------
/examples/tensorflow/multiple-choice/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Multiple Choice
18 | 
19 | ## Fine-tuning on SWAG
20 | 
21 | ```bash
22 | export SWAG_DIR=/path/to/swag_data_dir
23 | python ./examples/multiple-choice/run_tf_multiple_choice.py \
24 | --task_name swag \
25 | --model_name_or_path bert-base-cased \
26 | --do_train \
27 | --do_eval \
28 | --data_dir $SWAG_DIR \
29 | --learning_rate 5e-5 \
30 | --num_train_epochs 3 \
31 | --max_seq_length 80 \
32 | --output_dir models_bert/swag_base \
33 | --per_gpu_eval_batch_size=16 \
34 | --per_device_train_batch_size=16 \
35 | --logging-dir logs \
36 | --gradient_accumulation_steps 2 \
37 | --overwrite_output
38 | ```
39 | 


--------------------------------------------------------------------------------
/examples/tensorflow/multiple-choice/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece != 0.1.92
2 | protobuf
3 | tensorflow >= 2.3
4 | 


--------------------------------------------------------------------------------
/examples/tensorflow/question-answering/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.4.0
2 | tensorflow >= 2.3.0
3 | 


--------------------------------------------------------------------------------
/examples/tensorflow/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.1.3
2 | sentencepiece != 0.1.92
3 | protobuf
4 | tensorflow >= 2.3


--------------------------------------------------------------------------------
/exps/romanian_postprocess.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | ROOT=mosesdecoder
 4 | ro_post_process () {
 5 |   sys=$1
 6 |   ref=$2
 7 |   export MOSES_PATH=$ROOT
 8 |   REPLACE_UNICODE_PUNCT=$MOSES_PATH/scripts/tokenizer/replace-unicode-punctuation.perl
 9 |   NORM_PUNC=$MOSES_PATH/scripts/tokenizer/normalize-punctuation.perl
10 |   REM_NON_PRINT_CHAR=$MOSES_PATH/scripts/tokenizer/remove-non-printing-char.perl
11 |   REMOVE_DIACRITICS=$MOSES_PATH/wmt16-scripts/preprocess/remove-diacritics.py
12 |   NORMALIZE_ROMANIAN=$MOSES_PATH/wmt16-scripts/preprocess/normalise-romanian.py
13 |   TOKENIZER=$MOSES_PATH/scripts/tokenizer/tokenizer.perl
14 | 
15 |   lang=ro
16 |   for file in $sys $ref; do
17 |     cat $file \
18 |     | $REPLACE_UNICODE_PUNCT \
19 |     | $NORM_PUNC -l $lang \
20 |     | $REM_NON_PRINT_CHAR \
21 |     | $NORMALIZE_ROMANIAN \
22 |     | $REMOVE_DIACRITICS \
23 |     | $TOKENIZER -no-escape -l $lang \
24 |     > $(basename $file).tok
25 |   done
26 |   # compute BLEU
27 |   cat $(basename $sys).tok | sacrebleu -tok none -s none -b $(basename $ref).tok
28 | }
29 | 
30 | 
31 | ro_post_process ${1} ${2}


--------------------------------------------------------------------------------
/img/intro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/img/intro.png


--------------------------------------------------------------------------------
/model_cards/README.md:
--------------------------------------------------------------------------------
 1 | ## 🔥 Model cards now live inside each huggingface.co model repo 🔥
 2 | 
 3 | 
 4 | For consistency, ease of use and scalability, `README.md` model cards now live directly inside each model repo on the HuggingFace model hub.
 5 | 
 6 | ### How to update a model card
 7 | 
 8 | You can directly update a model card inside any model repo you have **write access** to, i.e.:
 9 | - a model under your username namespace
10 | - a model under any organization you are a part of.
11 | 
12 | You can either:
13 | - update it, commit and push using your usual git workflow (command line, GUI, etc.)
14 | - or edit it directly from the website's UI.
15 | 
16 | **What if you want to create or update a model card for a model you don't have write access to?**
17 | 
18 | In that case, given that we don't have a Pull request system yet on huggingface.co (🤯),
19 | you can open an issue here, post the card's content, and tag the model author(s) and/or the Hugging Face team.
20 | 
21 | We might implement a more seamless process at some point, so your early feedback is precious!
22 | Please let us know of any suggestion.
23 | 
24 | ### What happened to the model cards here?
25 | 
26 | We migrated every model card from the repo to its corresponding huggingface.co model repo. Individual commits were preserved, and they link back to the original commit on GitHub.
27 | 


--------------------------------------------------------------------------------
/petl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/petl/__init__.py


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 119
3 | target-version = ['py35']
4 | 


--------------------------------------------------------------------------------
/scripts/fsmt/tests-to-run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # these scripts need to be run before any changes to FSMT-related code - it should cover all bases
17 | 
18 | CUDA_VISIBLE_DEVICES="" RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py
19 | RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py
20 | 


--------------------------------------------------------------------------------
/scripts/pegasus/build_test_sample_spm_no_bos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # this script builds a small sample spm file tests/fixtures/test_sentencepiece_no_bos.model, with features needed by pegasus 
17 | 
18 | # 1. pip install sentencepiece
19 | # 
20 | # 2. wget https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt
21 | 
22 | # 3. build
23 | import sentencepiece as spm
24 | 
25 | # pegasus:
26 | # 1. no bos
27 | # 2. eos_id is 1
28 | # 3. unk_id is 2
29 | # build a sample spm file accordingly
30 | spm.SentencePieceTrainer.train('--input=botchan.txt --model_prefix=test_sentencepiece_no_bos --bos_id=-1 --unk_id=2  --eos_id=1  --vocab_size=1000')
31 | 
32 | # 4. now update the fixture
33 | # mv test_sentencepiece_no_bos.model ../../tests/fixtures/
34 | 


--------------------------------------------------------------------------------
/scripts/tatoeba/upload_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for FILE in converted/*; do 
 4 |   model_name=`basename $FILE`
 5 |   transformers-cli repo create $model_name -y
 6 |   git clone https://huggingface.co/Helsinki-NLP/$model_name
 7 |   mv $FILE/* $model_name/
 8 |   cd $model_name
 9 |   git add . && git commit -m "initial commit" 
10 |   git push
11 |   cd ..
12 | done
13 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | default_section = FIRSTPARTY
 3 | ensure_newline_before_comments = True
 4 | force_grid_wrap = 0
 5 | include_trailing_comma = True
 6 | known_first_party = transformers
 7 | known_third_party =
 8 |     absl
 9 |     conllu
10 |     datasets
11 |     elasticsearch
12 |     fairseq
13 |     faiss-cpu
14 |     fastprogress
15 |     fire
16 |     fugashi
17 |     git
18 |     h5py
19 |     matplotlib
20 |     nltk
21 |     numpy
22 |     packaging
23 |     pandas
24 |     PIL
25 |     psutil
26 |     pytest
27 |     pytorch_lightning
28 |     rouge_score
29 |     sacrebleu
30 |     seqeval
31 |     sklearn
32 |     streamlit
33 |     tensorboardX
34 |     tensorflow
35 |     tensorflow_datasets
36 |     timeout_decorator
37 |     torch
38 |     torchaudio
39 |     torchtext
40 |     torchvision
41 |     torch_xla
42 |     tqdm
43 | 
44 | line_length = 119
45 | lines_after_imports = 2
46 | multi_line_output = 3
47 | use_parentheses = True
48 | 
49 | [flake8]
50 | ignore = E203, E501, E741, W503, W605
51 | max-line-length = 119
52 | 


--------------------------------------------------------------------------------
/src/transformers/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/src/transformers/benchmark/__init__.py


--------------------------------------------------------------------------------
/src/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from argparse import ArgumentParser
17 | 
18 | 
19 | class BaseTransformersCLICommand(ABC):
20 |     @staticmethod
21 |     @abstractmethod
22 |     def register_subcommand(parser: ArgumentParser):
23 |         raise NotImplementedError()
24 | 
25 |     @abstractmethod
26 |     def run(self):
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/src/transformers/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .metrics import glue_compute_metrics, xnli_compute_metrics
20 | from .processors import (
21 |     DataProcessor,
22 |     InputExample,
23 |     InputFeatures,
24 |     SingleSentenceClassificationProcessor,
25 |     SquadExample,
26 |     SquadFeatures,
27 |     SquadV1Processor,
28 |     SquadV2Processor,
29 |     glue_convert_examples_to_features,
30 |     glue_output_modes,
31 |     glue_processors,
32 |     glue_tasks_num_labels,
33 |     squad_convert_examples_to_features,
34 |     xnli_output_modes,
35 |     xnli_processors,
36 |     xnli_tasks_num_labels,
37 | )
38 | 


--------------------------------------------------------------------------------
/src/transformers/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .glue import GlueDataset, GlueDataTrainingArguments
20 | from .language_modeling import (
21 |     LineByLineTextDataset,
22 |     LineByLineWithRefDataset,
23 |     LineByLineWithSOPTextDataset,
24 |     TextDataset,
25 |     TextDatasetForNextSentencePrediction,
26 | )
27 | from .squad import SquadDataset, SquadDataTrainingArguments
28 | 


--------------------------------------------------------------------------------
/src/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
20 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
21 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
22 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
23 | 


--------------------------------------------------------------------------------
/src/transformers/models/bert_japanese/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...file_utils import _BaseLazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_bert_japanese": ["BertJapaneseTokenizer", "CharacterTokenizer", "MecabTokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
31 | 
32 | else:
33 |     import importlib
34 |     import os
35 |     import sys
36 | 
37 |     class _LazyModule(_BaseLazyModule):
38 |         """
39 |         Module class that surfaces all objects but only performs associated imports when the objects are requested.
40 |         """
41 | 
42 |         __file__ = globals()["__file__"]
43 |         __path__ = [os.path.dirname(__file__)]
44 | 
45 |         def _get_module(self, module_name: str):
46 |             return importlib.import_module("." + module_name, self.__name__)
47 | 
48 |     sys.modules[__name__] = _LazyModule(__name__, _import_structure)
49 | 


--------------------------------------------------------------------------------
/src/transformers/models/bertweet/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...file_utils import _BaseLazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_bertweet": ["BertweetTokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_bertweet import BertweetTokenizer
31 | 
32 | else:
33 |     import importlib
34 |     import os
35 |     import sys
36 | 
37 |     class _LazyModule(_BaseLazyModule):
38 |         """
39 |         Module class that surfaces all objects but only performs associated imports when the objects are requested.
40 |         """
41 | 
42 |         __file__ = globals()["__file__"]
43 |         __path__ = [os.path.dirname(__file__)]
44 | 
45 |         def _get_module(self, module_name: str):
46 |             return importlib.import_module("." + module_name, self.__name__)
47 | 
48 |     sys.modules[__name__] = _LazyModule(__name__, _import_structure)
49 | 


--------------------------------------------------------------------------------
/src/transformers/models/byt5/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...file_utils import _BaseLazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_byt5": ["ByT5Tokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_byt5 import ByT5Tokenizer
31 | else:
32 |     import importlib
33 |     import os
34 |     import sys
35 | 
36 |     class _LazyModule(_BaseLazyModule):
37 |         """
38 |         Module class that surfaces all objects but only performs associated imports when the objects are requested.
39 |         """
40 | 
41 |         __file__ = globals()["__file__"]
42 |         __path__ = [os.path.dirname(__file__)]
43 | 
44 |         def _get_module(self, module_name: str):
45 |             return importlib.import_module("." + module_name, self.__name__)
46 | 
47 |     sys.modules[__name__] = _LazyModule(__name__, _import_structure)
48 | 


--------------------------------------------------------------------------------
/src/transformers/models/camembert/configuration_camembert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ CamemBERT configuration """
17 | 
18 | from ...utils import logging
19 | from ..roberta.configuration_roberta import RobertaConfig
20 | 
21 | 
22 | logger = logging.get_logger(__name__)
23 | 
24 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
25 |     "camembert-base": "https://huggingface.co/camembert-base/resolve/main/config.json",
26 |     "umberto-commoncrawl-cased-v1": "https://huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/resolve/main/config.json",
27 |     "umberto-wikipedia-uncased-v1": "https://huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/resolve/main/config.json",
28 | }
29 | 
30 | 
31 | class CamembertConfig(RobertaConfig):
32 |     """
33 |     This class overrides :class:`~transformers.RobertaConfig`. Please check the superclass for the appropriate
34 |     documentation alongside usage examples.
35 |     """
36 | 
37 |     model_type = "camembert"
38 | 


--------------------------------------------------------------------------------
/src/transformers/models/cpm/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...file_utils import _BaseLazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_cpm": ["CpmTokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_cpm import CpmTokenizer
31 | 
32 | else:
33 |     import importlib
34 |     import os
35 |     import sys
36 | 
37 |     class _LazyModule(_BaseLazyModule):
38 |         """
39 |         Module class that surfaces all objects but only performs associated imports when the objects are requested.
40 |         """
41 | 
42 |         __file__ = globals()["__file__"]
43 |         __path__ = [os.path.dirname(__file__)]
44 | 
45 |         def _get_module(self, module_name: str):
46 |             return importlib.import_module("." + module_name, self.__name__)
47 | 
48 |     sys.modules[__name__] = _LazyModule(__name__, _import_structure)
49 | 


--------------------------------------------------------------------------------
/src/transformers/models/dialogpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/src/transformers/models/dialogpt/__init__.py


--------------------------------------------------------------------------------
/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | import os
17 | 
18 | import torch
19 | 
20 | from transformers.file_utils import WEIGHTS_NAME
21 | 
22 | 
23 | DIALOGPT_MODELS = ["small", "medium", "large"]
24 | 
25 | OLD_KEY = "lm_head.decoder.weight"
26 | NEW_KEY = "lm_head.weight"
27 | 
28 | 
29 | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
30 |     d = torch.load(checkpoint_path)
31 |     d[NEW_KEY] = d.pop(OLD_KEY)
32 |     os.makedirs(pytorch_dump_folder_path, exist_ok=True)
33 |     torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     parser = argparse.ArgumentParser()
38 |     parser.add_argument("--dialogpt_path", default=".", type=str)
39 |     args = parser.parse_args()
40 |     for MODEL in DIALOGPT_MODELS:
41 |         checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl")
42 |         pytorch_dump_folder_path = f"./DialoGPT-{MODEL}"
43 |         convert_dialogpt_checkpoint(
44 |             checkpoint_path,
45 |             pytorch_dump_folder_path,
46 |         )
47 | 


--------------------------------------------------------------------------------
/src/transformers/models/mmbt/configuration_mmbt.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # Copyright (c) HuggingFace Inc. team.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ MMBT configuration """
17 | 
18 | from ...utils import logging
19 | 
20 | 
21 | logger = logging.get_logger(__name__)
22 | 
23 | 
24 | class MMBTConfig(object):
25 |     """
26 |     This is the configuration class to store the configuration of a :class:`~transformers.MMBTModel`. It is used to
27 |     instantiate a MMBT model according to the specified arguments, defining the model architecture.
28 | 
29 |     Args:
30 |         config (:class:`~transformers.PreTrainedConfig`):
31 |             Config of the underlying Transformer models. Its values are copied over to use a single config.
32 |         num_labels (:obj:`int`, `optional`):
33 |             Size of final Linear layer for classification.
34 |         modal_hidden_size (:obj:`int`, `optional`, defaults to 2048):
35 |             Embedding dimension of the non-text modality encoder.
36 |     """
37 | 
38 |     def __init__(self, config, num_labels=None, modal_hidden_size=2048):
39 |         self.__dict__ = config.__dict__
40 |         self.modal_hidden_size = modal_hidden_size
41 |         if num_labels:
42 |             self.num_labels = num_labels
43 | 


--------------------------------------------------------------------------------
/src/transformers/models/phobert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...file_utils import _BaseLazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_phobert": ["PhobertTokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_phobert import PhobertTokenizer
31 | 
32 | else:
33 |     import importlib
34 |     import os
35 |     import sys
36 | 
37 |     class _LazyModule(_BaseLazyModule):
38 |         """
39 |         Module class that surfaces all objects but only performs associated imports when the objects are requested.
40 |         """
41 | 
42 |         __file__ = globals()["__file__"]
43 |         __path__ = [os.path.dirname(__file__)]
44 | 
45 |         def _get_module(self, module_name: str):
46 |             return importlib.import_module("." + module_name, self.__name__)
47 | 
48 |     sys.modules[__name__] = _LazyModule(__name__, _import_structure)
49 | 


--------------------------------------------------------------------------------
/src/transformers/models/xlm_prophetnet/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from ...file_utils import is_sentencepiece_available, is_torch_available
20 | from .configuration_xlm_prophetnet import XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMProphetNetConfig
21 | 
22 | 
23 | if is_sentencepiece_available():
24 |     from .tokenization_xlm_prophetnet import XLMProphetNetTokenizer
25 | 
26 | if is_torch_available():
27 |     from .modeling_xlm_prophetnet import (
28 |         XLM_PROPHETNET_PRETRAINED_MODEL_ARCHIVE_LIST,
29 |         XLMProphetNetDecoder,
30 |         XLMProphetNetEncoder,
31 |         XLMProphetNetForCausalLM,
32 |         XLMProphetNetForConditionalGeneration,
33 |         XLMProphetNetModel,
34 |     )
35 | 


--------------------------------------------------------------------------------
/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Microsoft Authors and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ XLM-ProphetNet model configuration """
16 | 
17 | 
18 | from ...utils import logging
19 | from ..prophetnet.configuration_prophetnet import ProphetNetConfig
20 | 
21 | 
22 | logger = logging.get_logger(__name__)
23 | 
24 | XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP = {
25 |     "microsoft/xprophetnet-large-wiki100-cased": "https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/resolve/main/config.json",
26 | }
27 | 
28 | 
29 | class XLMProphetNetConfig(ProphetNetConfig):
30 |     """
31 |     This class overrides :class:`~transformers.ProphetNetConfig`. Please check the superclass for the appropriate
32 |     documentation alongside usage examples.
33 |     """
34 | 
35 |     model_type = "xlm-prophetnet"
36 | 


--------------------------------------------------------------------------------
/src/transformers/sagemaker/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .trainer_sm import SageMakerTrainer
20 | from .training_args_sm import SageMakerTrainingArguments, is_sagemaker_dp_enabled
21 | 


--------------------------------------------------------------------------------
/src/transformers/sagemaker/trainer_sm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import warnings
15 | 
16 | from ..trainer import Trainer
17 | from ..utils import logging
18 | 
19 | 
20 | logger = logging.get_logger(__name__)
21 | 
22 | 
23 | class SageMakerTrainer(Trainer):
24 |     def __init__(self, args=None, **kwargs):
25 |         warnings.warn(
26 |             "`SageMakerTrainer` is deprecated and will be removed in v5 of Transformers. You can use `Trainer` "
27 |             "instead.",
28 |             FutureWarning,
29 |         )
30 |         super().__init__(args=args, **kwargs)
31 | 


--------------------------------------------------------------------------------
/src/transformers/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from packaging import version
18 | 
19 | from .. import __version__
20 | 
21 | 
22 | def check_min_version(min_version):
23 |     if version.parse(__version__) < version.parse(min_version):
24 |         if "dev" in min_version:
25 |             error_message = (
26 |                 "This example requires a source install from HuggingFace Transformers (see "
27 |                 "`https://huggingface.co/transformers/installation.html#installing-from-source`),"
28 |             )
29 |         else:
30 |             error_message = f"This example requires a minimum version of {min_version},"
31 |         error_message += f" but the version found is {__version__}.\n"
32 |         raise ImportError(
33 |             error_message
34 |             + (
35 |                 "Check out https://huggingface.co/transformers/examples.html for the examples corresponding to other "
36 |                 "versions of HuggingFace Transformers."
37 |             )
38 |         )
39 | 


--------------------------------------------------------------------------------
/src/transformers/utils/dummy_sentencepiece_and_speech_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..file_utils import requires_backends
 3 | 
 4 | 
 5 | class Speech2TextProcessor:
 6 |     def __init__(self, *args, **kwargs):
 7 |         requires_backends(self, ["sentencepiece", "speech"])
 8 | 
 9 |     @classmethod
10 |     def from_pretrained(cls, *args, **kwargs):
11 |         requires_backends(cls, ["sentencepiece", "speech"])
12 | 


--------------------------------------------------------------------------------
/src/transformers/utils/dummy_sentencepiece_and_tokenizers_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..file_utils import requires_backends
 3 | 
 4 | 
 5 | SLOW_TO_FAST_CONVERTERS = None
 6 | 
 7 | 
 8 | def convert_slow_tokenizer(*args, **kwargs):
 9 |     requires_backends(convert_slow_tokenizer, ["sentencepiece", "tokenizers"])
10 | 


--------------------------------------------------------------------------------
/src/transformers/utils/dummy_speech_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..file_utils import requires_backends
3 | 
4 | 
5 | class Speech2TextFeatureExtractor:
6 |     def __init__(self, *args, **kwargs):
7 |         requires_backends(self, ["speech"])
8 | 


--------------------------------------------------------------------------------
/src/transformers/utils/dummy_timm_and_vision_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..file_utils import requires_backends
 3 | 
 4 | 
 5 | DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None
 6 | 
 7 | 
 8 | class DetrForObjectDetection:
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_backends(self, ["timm", "vision"])
11 | 
12 |     @classmethod
13 |     def from_pretrained(cls, *args, **kwargs):
14 |         requires_backends(cls, ["timm", "vision"])
15 | 
16 | 
17 | class DetrForSegmentation:
18 |     def __init__(self, *args, **kwargs):
19 |         requires_backends(self, ["timm", "vision"])
20 | 
21 |     @classmethod
22 |     def from_pretrained(cls, *args, **kwargs):
23 |         requires_backends(cls, ["timm", "vision"])
24 | 
25 | 
26 | class DetrModel:
27 |     def __init__(self, *args, **kwargs):
28 |         requires_backends(self, ["timm", "vision"])
29 | 
30 |     @classmethod
31 |     def from_pretrained(cls, *args, **kwargs):
32 |         requires_backends(cls, ["timm", "vision"])
33 | 
34 | 
35 | class DetrPreTrainedModel:
36 |     def __init__(self, *args, **kwargs):
37 |         requires_backends(self, ["timm", "vision"])
38 | 
39 |     @classmethod
40 |     def from_pretrained(cls, *args, **kwargs):
41 |         requires_backends(cls, ["timm", "vision"])
42 | 


--------------------------------------------------------------------------------
/src/transformers/utils/dummy_timm_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..file_utils import requires_backends
 3 | 
 4 | 
 5 | DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None
 6 | 
 7 | 
 8 | class DetrForObjectDetection:
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_backends(self, ["timm"])
11 | 
12 |     @classmethod
13 |     def from_pretrained(cls, *args, **kwargs):
14 |         requires_backends(cls, ["timm"])
15 | 
16 | 
17 | class DetrForSegmentation:
18 |     def __init__(self, *args, **kwargs):
19 |         requires_backends(self, ["timm"])
20 | 
21 |     @classmethod
22 |     def from_pretrained(cls, *args, **kwargs):
23 |         requires_backends(cls, ["timm"])
24 | 
25 | 
26 | class DetrModel:
27 |     def __init__(self, *args, **kwargs):
28 |         requires_backends(self, ["timm"])
29 | 
30 |     @classmethod
31 |     def from_pretrained(cls, *args, **kwargs):
32 |         requires_backends(cls, ["timm"])
33 | 


--------------------------------------------------------------------------------
/src/transformers/utils/dummy_vision_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..file_utils import requires_backends
 3 | 
 4 | 
 5 | class ImageFeatureExtractionMixin:
 6 |     def __init__(self, *args, **kwargs):
 7 |         requires_backends(self, ["vision"])
 8 | 
 9 | 
10 | class CLIPFeatureExtractor:
11 |     def __init__(self, *args, **kwargs):
12 |         requires_backends(self, ["vision"])
13 | 
14 | 
15 | class CLIPProcessor:
16 |     def __init__(self, *args, **kwargs):
17 |         requires_backends(self, ["vision"])
18 | 
19 |     @classmethod
20 |     def from_pretrained(cls, *args, **kwargs):
21 |         requires_backends(cls, ["vision"])
22 | 
23 | 
24 | class DeiTFeatureExtractor:
25 |     def __init__(self, *args, **kwargs):
26 |         requires_backends(self, ["vision"])
27 | 
28 | 
29 | class DetrFeatureExtractor:
30 |     def __init__(self, *args, **kwargs):
31 |         requires_backends(self, ["vision"])
32 | 
33 | 
34 | class ViTFeatureExtractor:
35 |     def __init__(self, *args, **kwargs):
36 |         requires_backends(self, ["vision"])
37 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_example_script/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # How to add a new example script in 🤗 Transformers
18 | 
19 | This folder provide a template for adding a new example script implementing a training or inference task with the
20 | models in the 🤗 Transformers library. To use it, you will need to install cookiecutter:
21 | ```
22 | pip install cookiecutter
23 | ```
24 | or refer to the installation page of the [cookiecutter documentation](https://cookiecutter.readthedocs.io/).
25 | 
26 | You can then run the following command inside the `examples` folder of the transformers repo:
27 | ```
28 | cookiecutter ../templates/adding_a_new_example_script/
29 | ```
30 | and answer the questions asked, which will generate a new folder where you will find a pre-filled template for your
31 | example following the best practices we recommend for them.
32 | 
33 | Adjust the way the data is preprocessed, the model is loaded or the Trainer is instantiated then when you're happy, add
34 | a `README.md` in the folder (or complete the existing one if you added a script to an existing folder) telling a user
35 | how to run your script.
36 | 
37 | Make a PR to the 🤗 Transformers repo. Don't forget to tweet about your new example with a carbon screenshot of how to
38 | run it and tag @huggingface!
39 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_example_script/cookiecutter.json:
--------------------------------------------------------------------------------
1 | {
2 |   "example_name": "text classification",
3 |   "directory_name": "{{cookiecutter.example_name|lower|replace(' ', '-')}}",
4 |   "example_shortcut": "{{cookiecutter.directory_name}}",
5 |   "model_class": "AutoModel",
6 |   "authors": "The HuggingFace Team",
7 |   "can_train_from_scratch": ["True", "False"],
8 |   "with_trainer": ["True", "False"]
9 | }


--------------------------------------------------------------------------------
/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "{{cookiecutter.modelname}}",
 3 |   "uppercase_modelname": "{{cookiecutter.uppercase_modelname}}",
 4 |   "lowercase_modelname": "{{cookiecutter.lowercase_modelname}}",
 5 |   "camelcase_modelname": "{{cookiecutter.camelcase_modelname}}",
 6 |   "authors": "{{cookiecutter.authors}}",
 7 |   "checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
 8 |   "tokenizer_type": "{{cookiecutter.tokenizer_type}}",
 9 |   "generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}",
10 |   "is_encoder_decoder_model": ["True", "False"]
11 | }
12 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/cookiecutter.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "BrandNewBERT",
 3 |   "uppercase_modelname": "BRAND_NEW_BERT",
 4 |   "lowercase_modelname": "brand_new_bert",
 5 |   "camelcase_modelname": "BrandNewBert",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "brand-new-bert-base-cased",
 8 |   "tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"],
 9 |   "generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"],
10 |   "is_encoder_decoder_model": ["True", "False"]
11 | }
12 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/open_model_proposals/README.md:
--------------------------------------------------------------------------------
1 | Currently the following model proposals are available:
2 | 
3 | - <s>[BigBird (Google)](./ADD_BIG_BIRD.md)</s>
4 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/encoder-bert-tokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "Template",
 3 |   "uppercase_modelname": "TEMPLATE",
 4 |   "lowercase_modelname": "template",
 5 |   "camelcase_modelname": "Template",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "brand-new-bert-base-cased",
 8 |   "tokenizer_type": "Based on BERT",
 9 |   "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow",
10 |   "is_encoder_decoder_model": "False"
11 | }
12 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "TemplatePT",
 3 |   "uppercase_modelname": "TEMPLATE_PT",
 4 |   "lowercase_modelname": "template_pt",
 5 |   "camelcase_modelname": "TemplatePt",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "brand-new-bert-base-cased",
 8 |   "tokenizer_type": "Based on BERT",
 9 |   "generate_tensorflow_and_pytorch": "PyTorch",
10 |   "is_encoder_decoder_model": "False"
11 | }
12 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "NewENCDEC",
 3 |   "uppercase_modelname": "NEW_ENC_DEC",
 4 |   "lowercase_modelname": "new_enc_dec",
 5 |   "camelcase_modelname": "NewEncDec",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "new-enc-dec-base",
 8 |   "tokenizer_type": "Based on BART",
 9 |   "generate_tensorflow_and_pytorch": "PyTorch",
10 |   "is_encoder_decoder_model": "True"
11 | }
12 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/standalone.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "TemplateBI",
 3 |   "uppercase_modelname": "TEMPLATE_BI",
 4 |   "lowercase_modelname": "template_bi",
 5 |   "camelcase_modelname": "TemplateBi",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "bi-brand-new-bert-base-cased",
 8 |   "tokenizer_type": "Standalone",
 9 |   "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow",
10 |   "is_encoder_decoder_model": "False"
11 | }
12 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "TemplateTF",
 3 |   "uppercase_modelname": "TEMPLATE_TF",
 4 |   "lowercase_modelname": "template_tf",
 5 |   "camelcase_modelname": "TemplateTf",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "brand-new-bert-base-cased",
 8 |   "tokenizer_type": "Based on BERT",
 9 |   "generate_tensorflow_and_pytorch": "TensorFlow",
10 |   "is_encoder_decoder_model": "False"
11 | }
12 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "modelname": "NewTFENCDEC",
 3 |   "uppercase_modelname": "NEW_TF_ENC_DEC",
 4 |   "lowercase_modelname": "new_tf_enc_dec",
 5 |   "camelcase_modelname": "NewTFEncDec",
 6 |   "authors": "The HuggingFace Team",
 7 |   "checkpoint_identifier": "new-tf-enc-dec-base",
 8 |   "tokenizer_type": "Based on BART",
 9 |   "generate_tensorflow_and_pytorch": "TensorFlow",
10 |   "is_encoder_decoder_model": "True"
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/__init__.py


--------------------------------------------------------------------------------
/tests/deepspeed/ds_config_zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 | 
11 |     "optimizer": {
12 |         "type": "AdamW",
13 |         "params": {
14 |             "lr": "auto",
15 |             "betas": "auto",
16 |             "eps": "auto",
17 |             "weight_decay": "auto"
18 |         }
19 |     },
20 | 
21 |     "scheduler": {
22 |         "type": "WarmupLR",
23 |         "params": {
24 |             "warmup_min_lr": "auto",
25 |             "warmup_max_lr": "auto",
26 |             "warmup_num_steps": "auto"
27 |         }
28 |     },
29 | 
30 |     "zero_optimization": {
31 |         "stage": 2,
32 |         "offload_optimizer": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "allgather_partitions": true,
37 |         "allgather_bucket_size": 2e8,
38 |         "overlap_comm": true,
39 |         "reduce_scatter": true,
40 |         "reduce_bucket_size": 2e8,
41 |         "contiguous_gradients": true
42 |     },
43 | 
44 |     "gradient_accumulation_steps": "auto",
45 |     "gradient_clipping": "auto",
46 |     "steps_per_print": 2000,
47 |     "train_batch_size": "auto",
48 |     "train_micro_batch_size_per_gpu": "auto",
49 |     "wall_clock_breakdown": false
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/deepspeed/ds_config_zero3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 | 
11 |     "optimizer": {
12 |         "type": "AdamW",
13 |         "params": {
14 |             "lr": "auto",
15 |             "betas": "auto",
16 |             "eps": "auto",
17 |             "weight_decay": "auto"
18 |         }
19 |     },
20 | 
21 |     "scheduler": {
22 |         "type": "WarmupLR",
23 |         "params": {
24 |             "warmup_min_lr": "auto",
25 |             "warmup_max_lr": "auto",
26 |             "warmup_num_steps": "auto"
27 |         }
28 |     },
29 | 
30 |     "zero_optimization": {
31 |         "stage": 3,
32 |         "offload_optimizer": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "offload_param": {
37 |             "device": "cpu",
38 |             "pin_memory": true
39 |         },
40 |         "overlap_comm": true,
41 |         "contiguous_gradients": true,
42 |         "sub_group_size": 1e9,
43 |         "reduce_bucket_size": "auto",
44 |         "stage3_prefetch_bucket_size": "auto",
45 |         "stage3_param_persistence_threshold": "auto",
46 |         "stage3_max_live_parameters": 1e9,
47 |         "stage3_max_reuse_distance": 1e9,
48 |         "stage3_gather_fp16_weights_on_model_save": true
49 |     },
50 | 
51 |     "gradient_accumulation_steps": "auto",
52 |     "gradient_clipping": "auto",
53 |     "steps_per_print": 2000,
54 |     "train_batch_size": "auto",
55 |     "train_micro_batch_size_per_gpu": "auto",
56 |     "wall_clock_breakdown": false
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy-config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "model_type": "roberta"
3 | }


--------------------------------------------------------------------------------
/tests/fixtures/dummy_feature_extractor_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "feature_extractor_type": "Wav2Vec2FeatureExtractor"
3 | }


--------------------------------------------------------------------------------
/tests/fixtures/empty.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/empty.txt


--------------------------------------------------------------------------------
/tests/fixtures/input.txt:
--------------------------------------------------------------------------------
1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/preprocessor_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "feature_extractor_type": "Wav2Vec2FeatureExtractor"
3 | }


--------------------------------------------------------------------------------
/tests/fixtures/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/spiece.model


--------------------------------------------------------------------------------
/tests/fixtures/test_sentencepiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/test_sentencepiece.model


--------------------------------------------------------------------------------
/tests/fixtures/test_sentencepiece_bpe.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/test_sentencepiece_bpe.model


--------------------------------------------------------------------------------
/tests/fixtures/test_sentencepiece_no_bos.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/test_sentencepiece_no_bos.model


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/.gitignore:
--------------------------------------------------------------------------------
1 | cache*
2 | temp*
3 | !*.txt
4 | !*.tsv
5 | !*.json
6 | !.gitignore 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/COCO/000000039769.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/tests_samples/COCO/000000039769.png


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/COCO/coco_panoptic/000000039769.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/tests_samples/COCO/coco_panoptic/000000039769.png


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt:
--------------------------------------------------------------------------------
1 | [{"id": 8222595, "category_id": 17, "iscrowd": 0, "bbox": [18, 54, 301, 415], "area": 53306}, {"id": 8225432, "category_id": 17, "iscrowd": 0, "bbox": [349, 26, 291, 343], "area": 59627}, {"id": 8798150, "category_id": 63, "iscrowd": 0, "bbox": [1, 0, 639, 474], "area": 174579}, {"id": 14466198, "category_id": 75, "iscrowd": 0, "bbox": [42, 74, 133, 45], "area": 4068}, {"id": 12821912, "category_id": 75, "iscrowd": 0, "bbox": [333, 80, 38, 106], "area": 2118}, {"id": 10898909, "category_id": 93, "iscrowd": 0, "bbox": [0, 0, 640, 480], "area": 2750}]


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/GermEval/labels.txt:
--------------------------------------------------------------------------------
 1 | B-LOC
 2 | B-LOCderiv
 3 | B-LOCpart
 4 | B-ORG
 5 | B-ORGderiv
 6 | B-ORGpart
 7 | B-OTH
 8 | B-OTHderiv
 9 | B-OTHpart
10 | B-PER
11 | B-PERderiv
12 | B-PERpart
13 | I-LOC
14 | I-LOCderiv
15 | I-LOCpart
16 | I-ORG
17 | I-ORGderiv
18 | I-ORGpart
19 | I-OTH
20 | I-OTHderiv
21 | I-OTHpart
22 | I-PER
23 | I-PERderiv
24 | I-PERpart
25 | O
26 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/MRPC/dev.csv:
--------------------------------------------------------------------------------
1 | label,sentence1,sentence2
2 | equivalent,He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .,""" The foodservice pie business does not fit our long-term growth strategy ."
3 | not_equivalent,Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .,"His wife said he was "" 100 percent behind George Bush "" and looked forward to using his years of training in the war ."
4 | not_equivalent,"The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .","The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent ."
5 | equivalent,The AFL-CIO is waiting until October to decide if it will endorse a candidate .,The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | not_equivalent,No dates have been set for the civil or the criminal trial .,"No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty ."
7 | equivalent,Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .,It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/MRPC/dev.tsv:
--------------------------------------------------------------------------------
1 | ﻿Quality	#1 ID	#2 ID	#1 String	#2 String
2 | 1	1355540	1355592	He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .	" The foodservice pie business does not fit our long-term growth strategy .
3 | 0	2029631	2029565	Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .	His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0	487993	487952	The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .	The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1	1989515	1989458	The AFL-CIO is waiting until October to decide if it will endorse a candidate .	The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0	1783137	1782659	No dates have been set for the civil or the criminal trial .	No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1	3039165	3039036	Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .	It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/MRPC/train.csv:
--------------------------------------------------------------------------------
1 | label,sentence1,sentence2
2 | equivalent,He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .,""" The foodservice pie business does not fit our long-term growth strategy ."
3 | not_equivalent,Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .,"His wife said he was "" 100 percent behind George Bush "" and looked forward to using his years of training in the war ."
4 | not_equivalent,"The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .","The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent ."
5 | equivalent,The AFL-CIO is waiting until October to decide if it will endorse a candidate .,The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | not_equivalent,No dates have been set for the civil or the criminal trial .,"No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty ."
7 | equivalent,Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .,It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/MRPC/train.tsv:
--------------------------------------------------------------------------------
1 | ﻿Quality	#1 ID	#2 ID	#1 String	#2 String
2 | 1	1355540	1355592	He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .	" The foodservice pie business does not fit our long-term growth strategy .
3 | 0	2029631	2029565	Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .	His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0	487993	487952	The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .	The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1	1989515	1989458	The AFL-CIO is waiting until October to decide if it will endorse a candidate .	The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0	1783137	1782659	No dates have been set for the civil or the criminal trial .	No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1	3039165	3039036	Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .	It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/STS-B/dev.tsv:
--------------------------------------------------------------------------------
 1 | index	genre	filename	year	old_index	source1	source2	sentence1	sentence2	score
 2 | 0	main-captions	MSRvid	2012test	0000	none	none	A man with a hard hat is dancing.	A man wearing a hard hat is dancing.	5.000
 3 | 1	main-captions	MSRvid	2012test	0002	none	none	A young child is riding a horse.	A child is riding a horse.	4.750
 4 | 2	main-captions	MSRvid	2012test	0003	none	none	A man is feeding a mouse to a snake.	The man is feeding a mouse to the snake.	5.000
 5 | 3	main-captions	MSRvid	2012test	0007	none	none	A woman is playing the guitar.	A man is playing guitar.	2.400
 6 | 4	main-captions	MSRvid	2012test	0008	none	none	A woman is playing the flute.	A man is playing a flute.	2.750
 7 | 5	main-captions	MSRvid	2012test	0010	none	none	A woman is cutting an onion.	A man is cutting onions.	2.615
 8 | 6	main-captions	MSRvid	2012test	0015	none	none	A man is erasing a chalk board.	The man is erasing the chalk board.	5.000
 9 | 7	main-captions	MSRvid	2012test	0023	none	none	A woman is carrying a boy.	A woman is carrying her baby.	2.333
10 | 8	main-captions	MSRvid	2012test	0027	none	none	Three men are playing guitars.	Three men are on stage playing guitars.	3.750
11 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/STS-B/train.tsv:
--------------------------------------------------------------------------------
 1 | index	genre	filename	year	old_index	source1	source2	sentence1	sentence2	score
 2 | 0	main-captions	MSRvid	2012test	0001	none	none	A plane is taking off.	An air plane is taking off.	5.000
 3 | 1	main-captions	MSRvid	2012test	0004	none	none	A man is playing a large flute.	A man is playing a flute.	3.800
 4 | 2	main-captions	MSRvid	2012test	0005	none	none	A man is spreading shreded cheese on a pizza.	A man is spreading shredded cheese on an uncooked pizza.	3.800
 5 | 3	main-captions	MSRvid	2012test	0006	none	none	Three men are playing chess.	Two men are playing chess.	2.600
 6 | 4	main-captions	MSRvid	2012test	0009	none	none	A man is playing the cello.	A man seated is playing the cello.	4.250
 7 | 5	main-captions	MSRvid	2012test	0011	none	none	Some men are fighting.	Two men are fighting.	4.250
 8 | 6	main-captions	MSRvid	2012test	0012	none	none	A man is smoking.	A man is skating.	0.500
 9 | 7	main-captions	MSRvid	2012test	0013	none	none	The man is playing the piano.	The man is playing the guitar.	1.600
10 | 8	main-captions	MSRvid	2012test	0014	none	none	A man is playing on a guitar and singing.	A woman is playing an acoustic guitar and singing.	2.200
11 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/wmt16/sample.json:
--------------------------------------------------------------------------------
 1 | {"translation": {"en": "Membership of Parliament: see Minutes", "ro": "Componenţa Parlamentului: a se vedea procesul-verbal"}}
 2 | {"translation": {"en": "Approval of Minutes of previous sitting: see Minutes", "ro": "Aprobarea procesului-verbal al şedinţei precedente: a se vedea procesul-verbal"}}
 3 | {"translation": {"en": "Membership of Parliament: see Minutes", "ro": "Componenţa Parlamentului: a se vedea procesul-verbal"}}
 4 | {"translation": {"en": "Verification of credentials: see Minutes", "ro": "Verificarea prerogativelor: a se vedea procesul-verbal"}}
 5 | {"translation": {"en": "Documents received: see Minutes", "ro": "Depunere de documente: a se vedea procesul-verbal"}}
 6 | {"translation": {"en": "Written statements and oral questions (tabling): see Minutes", "ro": "Declaraţii scrise şi întrebări orale (depunere): consultaţi procesul-verbal"}}
 7 | {"translation": {"en": "Petitions: see Minutes", "ro": "Petiţii: a se vedea procesul-verbal"}}
 8 | {"translation": {"en": "Texts of agreements forwarded by the Council: see Minutes", "ro": "Transmiterea de către Consiliu a textelor acordurilor: a se vedea procesul-verbal"}}
 9 | {"translation": {"en": "Action taken on Parliament's resolutions: see Minutes", "ro": "Cursul dat rezoluţiilor Parlamentului: a se vedea procesul-verbal"}}
10 | {"translation": {"en": "Agenda for next sitting: see Minutes", "ro": "Ordinea de zi a următoarei şedinţe: a se vedea procesul-verbal"}}
11 | 


--------------------------------------------------------------------------------
/tests/sagemaker/__init__.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | 
3 | 
4 | def is_sagemaker_available():
5 |     return importlib.util.find_spec("sagemaker") is not None
6 | 


--------------------------------------------------------------------------------
/tests/sagemaker/scripts/pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/huggingface/transformers.git@master # install master or adjust ist with vX.X.X for installing version specific transforms


--------------------------------------------------------------------------------
/tests/sagemaker/scripts/pytorch/run_ddp.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | import subprocess
 5 | from argparse import ArgumentParser
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def parse_args():
12 |     parser = ArgumentParser()
13 |     parsed, unknown = parser.parse_known_args()
14 |     for arg in unknown:
15 |         if arg.startswith(("-", "--")):
16 |             parser.add_argument(arg.split("=")[0])
17 | 
18 |     return parser.parse_args()
19 | 
20 | 
21 | def main():
22 |     args = parse_args()
23 |     port = 8888
24 |     num_gpus = int(os.environ["SM_NUM_GPUS"])
25 |     hosts = json.loads(os.environ["SM_HOSTS"])
26 |     num_nodes = len(hosts)
27 |     current_host = os.environ["SM_CURRENT_HOST"]
28 |     rank = hosts.index(current_host)
29 |     os.environ["NCCL_DEBUG"] = "INFO"
30 | 
31 |     if num_nodes > 1:
32 |         cmd = f"""python -m torch.distributed.launch \
33 |                 --nnodes={num_nodes}  \
34 |                 --node_rank={rank}  \
35 |                 --nproc_per_node={num_gpus}  \
36 |                 --master_addr={hosts[0]}  \
37 |                 --master_port={port} \
38 |                 ./run_glue.py \
39 |                 {"".join([f" --{parameter} {value}" for parameter,value in args.__dict__.items()])}"""
40 |     else:
41 |         cmd = f"""python -m torch.distributed.launch \
42 |             --nproc_per_node={num_gpus}  \
43 |             ./run_glue.py \
44 |             {"".join([f" --{parameter} {value}" for parameter,value in args.__dict__.items()])}"""
45 |     try:
46 |         subprocess.run(cmd, shell=True)
47 |     except Exception as e:
48 |         logger.info(e)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/tests/sagemaker/scripts/tensorflow/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/huggingface/transformers.git@master # install master or adjust ist with vX.X.X for installing version specific transforms


--------------------------------------------------------------------------------
/tests/test_activations.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from transformers import is_torch_available
18 | from transformers.testing_utils import require_torch
19 | 
20 | 
21 | if is_torch_available():
22 |     import torch
23 | 
24 |     from transformers.activations import _gelu_python, gelu_new, get_activation
25 | 
26 | 
27 | @require_torch
28 | class TestActivations(unittest.TestCase):
29 |     def test_gelu_versions(self):
30 |         x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
31 |         torch_builtin = get_activation("gelu")
32 |         self.assertTrue(torch.eq(_gelu_python(x), torch_builtin(x)).all().item())
33 |         self.assertFalse(torch.eq(_gelu_python(x), gelu_new(x)).all().item())
34 | 
35 |     def test_get_activation(self):
36 |         get_activation("swish")
37 |         get_activation("silu")
38 |         get_activation("relu")
39 |         get_activation("tanh")
40 |         get_activation("gelu_new")
41 |         get_activation("gelu_fast")
42 |         with self.assertRaises(KeyError):
43 |             get_activation("bogus")
44 |         with self.assertRaises(KeyError):
45 |             get_activation(None)
46 | 


--------------------------------------------------------------------------------
/tests/test_activations_tf.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from transformers import is_tf_available
18 | from transformers.testing_utils import require_tf
19 | 
20 | 
21 | if is_tf_available():
22 |     from transformers.activations_tf import get_tf_activation
23 | 
24 | 
25 | @require_tf
26 | class TestTFActivations(unittest.TestCase):
27 |     def test_get_activation(self):
28 |         get_tf_activation("swish")
29 |         get_tf_activation("silu")
30 |         get_tf_activation("gelu")
31 |         get_tf_activation("relu")
32 |         get_tf_activation("tanh")
33 |         get_tf_activation("gelu_new")
34 |         get_tf_activation("gelu_fast")
35 |         get_tf_activation("mish")
36 |         with self.assertRaises(KeyError):
37 |             get_tf_activation("bogus")
38 |         with self.assertRaises(KeyError):
39 |             get_tf_activation(None)
40 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | from unittest.mock import patch
18 | 
19 | from transformers.testing_utils import CaptureStd
20 | 
21 | 
22 | class CLITest(unittest.TestCase):
23 |     @patch("sys.argv", ["fakeprogrampath", "env"])
24 |     def test_cli_env(self):
25 |         # test transformers-cli env
26 |         import transformers.commands.transformers_cli
27 | 
28 |         with CaptureStd() as cs:
29 |             transformers.commands.transformers_cli.main()
30 |         assert "Python version" in cs.out
31 |         assert "Platform" in cs.out
32 |         assert "Using distributed or parallel set-up in script?" in cs.out
33 | 


--------------------------------------------------------------------------------
/tests/test_feature_extraction_auto.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import unittest
18 | 
19 | from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
20 | 
21 | 
22 | SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures")
23 | SAMPLE_FEATURE_EXTRACTION_CONFIG = os.path.join(
24 |     os.path.dirname(os.path.abspath(__file__)), "fixtures/dummy_feature_extractor_config.json"
25 | )
26 | 
27 | 
28 | class AutoFeatureExtractorTest(unittest.TestCase):
29 |     def test_feature_extractor_from_model_shortcut(self):
30 |         config = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
31 |         self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
32 | 
33 |     def test_feature_extractor_from_local_directory(self):
34 |         config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
35 |         self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
36 | 
37 |     def test_feature_extractor_from_local_file(self):
38 |         config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG)
39 |         self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
40 | 


--------------------------------------------------------------------------------
/tests/test_pipelines_feature_extraction.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from .test_pipelines_common import MonoInputPipelineCommonMixin
18 | 
19 | 
20 | class FeatureExtractionPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
21 |     pipeline_task = "feature-extraction"
22 |     small_models = [
23 |         "sshleifer/tiny-distilbert-base-cased"
24 |     ]  # Default model - Models tested without the @slow decorator
25 |     large_models = [None]  # Models tested with the @slow decorator
26 |     mandatory_keys = {}  # Keys which should be in the output
27 | 


--------------------------------------------------------------------------------
/tests/test_pipelines_text2text_generation.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from .test_pipelines_common import MonoInputPipelineCommonMixin
18 | 
19 | 
20 | class Text2TextGenerationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
21 |     pipeline_task = "text2text-generation"
22 |     small_models = ["patrickvonplaten/t5-tiny-random"]  # Default model - Models tested without the @slow decorator
23 |     large_models = []  # Models tested with the @slow decorator
24 |     invalid_inputs = [4, "<mask>"]
25 |     mandatory_keys = ["generated_text"]
26 | 


--------------------------------------------------------------------------------
/tests/test_pipelines_text_classification.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from .test_pipelines_common import MonoInputPipelineCommonMixin
18 | 
19 | 
20 | class TextClassificationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
21 |     pipeline_task = "sentiment-analysis"
22 |     small_models = [
23 |         "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english"
24 |     ]  # Default model - Models tested without the @slow decorator
25 |     large_models = [None]  # Models tested with the @slow decorator
26 |     mandatory_keys = {"label", "score"}  # Keys which should be in the output
27 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_blenderbot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """Tests for Blenderbot Tokenizers, including common tests for BlenderbotSmallTokenizer."""
17 | import unittest
18 | 
19 | from transformers.file_utils import cached_property
20 | from transformers.models.blenderbot.tokenization_blenderbot import BlenderbotTokenizer
21 | 
22 | 
23 | class Blenderbot3BTokenizerTests(unittest.TestCase):
24 |     @cached_property
25 |     def tokenizer_3b(self):
26 |         return BlenderbotTokenizer.from_pretrained("facebook/blenderbot-3B")
27 | 
28 |     def test_encode_decode_cycle(self):
29 |         tok = self.tokenizer_3b
30 |         src_text = " I am a small frog."
31 |         encoded = tok([src_text], padding=False, truncation=False)["input_ids"]
32 |         decoded = tok.batch_decode(encoded, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
33 |         assert src_text == decoded
34 | 
35 |     def test_3B_tokenization_same_as_parlai(self):
36 |         assert self.tokenizer_3b.add_prefix_space
37 |         assert self.tokenizer_3b([" Sam", "Sam"]).input_ids == [[5502, 2], [5502, 2]]
38 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_cpm.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 HuggingFace Inc. team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from transformers.models.cpm.tokenization_cpm import CpmTokenizer
17 | from transformers.testing_utils import custom_tokenizers
18 | 
19 | from .test_modeling_xlnet import XLNetModelTest
20 | 
21 | 
22 | @custom_tokenizers
23 | class CpmTokenizationTest(XLNetModelTest):
24 |     def test_pre_tokenization(self):
25 |         tokenizer = CpmTokenizer.from_pretrained("TsinghuaAI/CPM-Generate")
26 |         text = "Hugging Face大法好，谁用谁知道。"
27 |         normalized_text = "Hugging Face大法好,谁用谁知道。<unk>"
28 |         bpe_tokens = "▁Hu gg ing ▁ ▂ ▁F ace ▁大法 ▁好 ▁ , ▁谁 ▁用 ▁谁 ▁知 道 ▁ 。".split()
29 | 
30 |         tokens = tokenizer.tokenize(text)
31 |         self.assertListEqual(tokens, bpe_tokens)
32 | 
33 |         input_tokens = tokens + [tokenizer.unk_token]
34 | 
35 |         input_bpe_tokens = [13789, 13283, 1421, 8, 10, 1164, 13608, 16528, 63, 8, 9, 440, 108, 440, 121, 90, 8, 12, 0]
36 |         self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
37 | 
38 |         reconstructed_text = tokenizer.decode(input_bpe_tokens)
39 |         self.assertEqual(reconstructed_text, normalized_text)
40 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_distilbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from transformers import DistilBertTokenizer, DistilBertTokenizerFast
18 | from transformers.testing_utils import require_tokenizers, slow
19 | 
20 | from .test_tokenization_bert import BertTokenizationTest
21 | 
22 | 
23 | @require_tokenizers
24 | class DistilBertTokenizationTest(BertTokenizationTest):
25 | 
26 |     tokenizer_class = DistilBertTokenizer
27 |     rust_tokenizer_class = DistilBertTokenizerFast
28 |     test_rust_tokenizer = True
29 | 
30 |     @slow
31 |     def test_sequence_builders(self):
32 |         tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
33 | 
34 |         text = tokenizer.encode("sequence builders", add_special_tokens=False)
35 |         text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
36 | 
37 |         encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
38 |         encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
39 | 
40 |         assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id]
41 |         assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + text_2 + [
42 |             tokenizer.sep_token_id
43 |         ]
44 | 


--------------------------------------------------------------------------------
/tride/controller.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def override(hidden_states, override):
 5 |     """chanage the hidden states according to
 6 |     the arguments specified by override dict
 7 | 
 8 |     Args:
 9 | 
10 |     hidden_states (tensor): (B, T, H)
11 |     override (dict)
12 |     """
13 | 
14 |     alpha = override['alpha']
15 |     sklearn_model = override['model']
16 |     index = sklearn_model.coef_[0].nonzero()[0]
17 |     w = sklearn_model.coef_[0][index]
18 |     b = sklearn_model.intercept_[0]
19 | 
20 |     hidden_states_cpu = hidden_states.cpu().numpy()
21 | 
22 |     x = hidden_states_cpu[:,:,index]
23 | 
24 |     w_expand = np.expand_dims(w, axis=(0,1))
25 | 
26 |     project_x = x - ((np.dot(x, w) + b) / np.sqrt(np.dot(w, w))) * w_expand
27 | 
28 |     final_x = project_x + alpha * w_expand
29 | 
30 |     hidden_states_cpu[:,:,index] = final_x
31 | 
32 |     return hidden_states.new_tensor(hidden_states_cpu)
33 | 


--------------------------------------------------------------------------------
/tride/openai_sentiment_neuron/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *
2 | 


--------------------------------------------------------------------------------
/tride/openai_sentiment_neuron/sst_binary_demo.py:
--------------------------------------------------------------------------------
 1 | from encoder import Model
 2 | from matplotlib import pyplot as plt
 3 | from utils import sst_binary, train_with_reg_cv
 4 | 
 5 | model = Model()
 6 | 
 7 | trX, vaX, teX, trY, vaY, teY = sst_binary()
 8 | trXt = model.transform(trX)
 9 | vaXt = model.transform(vaX)
10 | teXt = model.transform(teX)
11 | 
12 | # classification results
13 | full_rep_acc, c, nnotzero = train_with_reg_cv(trXt, trY, vaXt, vaY, teXt, teY)
14 | print('%05.2f test accuracy'%full_rep_acc)
15 | print('%05.2f regularization coef'%c)
16 | print('%05d features used'%nnotzero)
17 | 
18 | # visualize sentiment unit
19 | sentiment_unit = trXt[:, 2388]
20 | plt.hist(sentiment_unit[trY==0], bins=25, alpha=0.5, label='neg')
21 | plt.hist(sentiment_unit[trY==1], bins=25, alpha=0.5, label='pos')
22 | plt.legend()
23 | plt.show()
24 | 


--------------------------------------------------------------------------------
/tride/scripts/generate_text.py:
--------------------------------------------------------------------------------
 1 | """sample text from the pretrained language model
 2 | """
 3 | 
 4 | import argparse
 5 | import torch
 6 | from transformers import AutoModelForCausalLM, PreTrainedTokenizerFast
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | # parser.add_argument('--prompt', type=str, default='',
10 |     # help='the prompt to start with')
11 | parser.add_argument('--model', type=str, default='gpt2-large',
12 |     help='the pretrained model name')
13 | 
14 | args = parser.parse_args()
15 | 
16 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
17 | 
18 | tokenizer = PreTrainedTokenizerFast.from_pretrained(args.model)
19 | model = AutoModelForCausalLM.from_pretrained(args.model)
20 | 
21 | model.to(device)
22 | model.eval()
23 | 
24 | prompt="In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."
25 | 
26 | # encode input context
27 | input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
28 | 
29 | outputs = model.generate(input_ids=None if prompt=='' else input_ids, do_sample=True, max_length=512, top_k=50)
30 | print(tokenizer.decode(outputs[0], skip_special_tokens=True))
31 | 


--------------------------------------------------------------------------------
/tride/scripts/sklearn_lr_sst.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | from openai_sentiment_neuron import sst_binary, train_with_reg_cv, train_with_reg
 4 | 
 5 | 
 6 | def read_input(keys):
 7 |     def parse_fname(fname):
 8 |         x = '.'.join(fname.split('.')[:-1])
 9 |         x = x.split('/')[-1]
10 |         x = x.split('.')
11 | 
12 |         size = int(x[-2].split('size')[-1])
13 |         embed = int(x[-1].split('hid')[-1])
14 | 
15 |         return size, embed
16 | 
17 |     size, embed = parse_fname(keys)
18 |     keys = np.memmap(keys,
19 |                      dtype=np.float32,
20 |                      mode='r',
21 |                      shape=(size, embed))
22 | 
23 |     return keys
24 | 
25 | parser = argparse.ArgumentParser()
26 | parser.add_argument('--data', type=str, default='openai_sentiment_neuron/data',
27 |     help='the data directory which consists of csv files')
28 | parser.add_argument('--train', type=str,
29 |     help='the data directory which consists of csv files')
30 | parser.add_argument('--val', type=str,
31 |     help='the data directory which consists of csv files')
32 | parser.add_argument('--test', type=str,
33 |     help='the data directory which consists of csv files')
34 | parser.add_argument('--c', type=float, default=1,
35 |     help='inverse the regularization constant')
36 | 
37 | args = parser.parse_args()
38 | 
39 | trX, vaX, teX, trY, vaY, teY = sst_binary(args.data)
40 | 
41 | trXt = read_input(args.train)
42 | vaXt = read_input(args.val)
43 | teXt = read_input(args.test)
44 | 
45 | # classification results
46 | full_rep_acc, c, nnotzero, model = train_with_reg(trXt, trY, vaXt, vaY, teXt, teY, c=args.c, verbose=1)
47 | print('%05.2f test accuracy'%full_rep_acc)
48 | print('%05.2f regularization coef'%c)
49 | print('%05d features used'%nnotzero)
50 | 


--------------------------------------------------------------------------------
/utils/get_modified_files.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # this script reports modified .py files under the desired list of top-level sub-dirs passed as a list of arguments, e.g.:
17 | #   python ./utils/get_modified_files.py utils src tests examples
18 | #
19 | # it uses git to find the forking point and which files were modified - i.e. files not under git won't be considered
20 | # since the output of this script is fed into Makefile commands it doesn't print a newline after the results
21 | 
22 | import re
23 | import subprocess
24 | import sys
25 | 
26 | 
27 | fork_point_sha = subprocess.check_output("git merge-base master HEAD".split()).decode("utf-8")
28 | modified_files = subprocess.check_output(f"git diff --name-only {fork_point_sha}".split()).decode("utf-8").split()
29 | 
30 | joined_dirs = "|".join(sys.argv[1:])
31 | regex = re.compile(fr"^({joined_dirs}).*?\.py$")
32 | 
33 | relevant_modified_files = [x for x in modified_files if regex.match(x)]
34 | print(" ".join(relevant_modified_files), end="")
35 | 


--------------------------------------------------------------------------------