├── .circleci ├── config.yml └── deploy.sh ├── .coveragerc ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── ---new-benchmark.md │ ├── --new-model-addition.md │ ├── bug-report.md │ ├── feature-request.md │ ├── migration.md │ └── question-help.md ├── PULL_REQUEST_TEMPLATE.md ├── conda │ ├── build.sh │ └── meta.yaml └── workflows │ ├── github-torch-hub.yml │ ├── model-templates.yml │ ├── release-conda.yml │ ├── self-push.yml │ ├── self-scheduled.yml │ └── stale.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUES.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── README_hf.md ├── analysis ├── paper_plot.py └── plot_weights.py ├── docker ├── transformers-cpu │ └── Dockerfile ├── transformers-gpu │ └── Dockerfile ├── transformers-pytorch-cpu │ └── Dockerfile ├── transformers-pytorch-gpu │ └── Dockerfile ├── transformers-pytorch-tpu │ ├── Dockerfile │ ├── bert-base-cased.jsonnet │ ├── dataset.yaml │ └── docker-entrypoint.sh ├── transformers-tensorflow-cpu │ └── Dockerfile └── transformers-tensorflow-gpu │ └── Dockerfile ├── docs ├── Makefile ├── README.md └── source │ ├── _static │ ├── css │ │ ├── Calibre-Light.ttf │ │ ├── Calibre-Medium.otf │ │ ├── Calibre-Regular.otf │ │ ├── Calibre-Thin.otf │ │ ├── code-snippets.css │ │ └── huggingface.css │ └── js │ │ ├── custom.js │ │ └── huggingface_logo.svg │ ├── add_new_model.rst │ ├── benchmarks.rst │ ├── bertology.rst │ ├── community.md │ ├── conf.py │ ├── contributing.md │ ├── converting_tensorflow_models.rst │ ├── custom_datasets.rst │ ├── debugging.rst │ ├── examples.md │ ├── fast_tokenizers.rst │ ├── favicon.ico │ ├── glossary.rst │ ├── imgs │ ├── course_banner.png │ ├── local_attention_mask.png │ ├── ppl_chunked.gif │ ├── ppl_full.gif │ ├── ppl_sliding.gif │ ├── transformers_logo_name.png │ ├── transformers_overview.png │ ├── warmup_constant_schedule.png │ ├── warmup_cosine_hard_restarts_schedule.png │ ├── warmup_cosine_schedule.png │ ├── warmup_cosine_warm_restarts_schedule.png │ └── warmup_linear_schedule.png │ ├── index.rst │ ├── installation.md │ ├── internal │ ├── file_utils.rst │ ├── generation_utils.rst │ ├── modeling_utils.rst │ ├── pipelines_utils.rst │ ├── tokenization_utils.rst │ └── trainer_utils.rst │ ├── main_classes │ ├── callback.rst │ ├── configuration.rst │ ├── data_collator.rst │ ├── deepspeed.rst │ ├── feature_extractor.rst │ ├── logging.rst │ ├── model.rst │ ├── optimizer_schedules.rst │ ├── output.rst │ ├── pipelines.rst │ ├── processors.rst │ ├── tokenizer.rst │ └── trainer.rst │ ├── migration.md │ ├── model_doc │ ├── albert.rst │ ├── auto.rst │ ├── bart.rst │ ├── barthez.rst │ ├── bert.rst │ ├── bert_japanese.rst │ ├── bertgeneration.rst │ ├── bertweet.rst │ ├── bigbird.rst │ ├── bigbird_pegasus.rst │ ├── blenderbot.rst │ ├── blenderbot_small.rst │ ├── bort.rst │ ├── byt5.rst │ ├── camembert.rst │ ├── clip.rst │ ├── convbert.rst │ ├── cpm.rst │ ├── ctrl.rst │ ├── deberta.rst │ ├── deberta_v2.rst │ ├── deit.rst │ ├── detr.rst │ ├── dialogpt.rst │ ├── distilbert.rst │ ├── dpr.rst │ ├── electra.rst │ ├── encoderdecoder.rst │ ├── flaubert.rst │ ├── fsmt.rst │ ├── funnel.rst │ ├── gpt.rst │ ├── gpt2.rst │ ├── gpt_neo.rst │ ├── herbert.rst │ ├── hubert.rst │ ├── ibert.rst │ ├── layoutlm.rst │ ├── led.rst │ ├── longformer.rst │ ├── luke.rst │ ├── lxmert.rst │ ├── m2m_100.rst │ ├── marian.rst │ ├── mbart.rst │ ├── megatron_bert.rst │ ├── megatron_gpt2.rst │ ├── mobilebert.rst │ ├── mpnet.rst │ ├── mt5.rst │ ├── pegasus.rst │ ├── phobert.rst │ ├── prophetnet.rst │ ├── rag.rst │ ├── reformer.rst │ ├── retribert.rst │ ├── roberta.rst │ ├── roformer.rst │ ├── speech_to_text.rst │ ├── squeezebert.rst │ ├── t5.rst │ ├── tapas.rst │ ├── transformerxl.rst │ ├── visual_bert.rst │ ├── vit.rst │ ├── wav2vec2.rst │ ├── xlm.rst │ ├── xlmprophetnet.rst │ ├── xlmroberta.rst │ ├── xlnet.rst │ └── xlsr_wav2vec2.rst │ ├── model_sharing.rst │ ├── model_summary.rst │ ├── multilingual.rst │ ├── notebooks.md │ ├── performance.md │ ├── perplexity.rst │ ├── philosophy.rst │ ├── preprocessing.rst │ ├── pretrained_models.rst │ ├── quicktour.rst │ ├── sagemaker.md │ ├── serialization.rst │ ├── task_summary.rst │ ├── testing.rst │ ├── tokenizer_summary.rst │ ├── training.rst │ └── troubleshooting.md ├── examples ├── README.md ├── flax │ ├── README.md │ ├── language-modeling │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_clm_flax.py │ │ └── run_mlm_flax.py │ ├── summarization │ │ └── run_summarization_flax.py │ └── text-classification │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_flax_glue.py ├── legacy │ ├── README.md │ ├── multiple_choice │ │ ├── run_multiple_choice.py │ │ └── utils_multiple_choice.py │ ├── pytorch-lightning │ │ ├── lightning_base.py │ │ ├── requirements.txt │ │ ├── run_glue.py │ │ ├── run_glue.sh │ │ ├── run_ner.py │ │ ├── run_ner.sh │ │ └── run_pos.sh │ ├── question-answering │ │ ├── run_squad.py │ │ └── run_squad_trainer.py │ ├── run_camembert.py │ ├── run_chinese_ref.py │ ├── run_language_modeling.py │ ├── run_openai_gpt.py │ ├── run_swag.py │ ├── run_transfo_xl.py │ ├── seq2seq │ │ ├── README.md │ │ ├── __init__.py │ │ ├── convert_model_to_fp16.py │ │ ├── download_wmt.py │ │ ├── finetune.sh │ │ ├── finetune_tpu.sh │ │ ├── finetune_trainer.py │ │ ├── minify_dataset.py │ │ ├── old_test_calculate_rouge.py │ │ ├── old_test_datasets.py │ │ ├── old_test_fsmt_bleu_score.py │ │ ├── old_test_seq2seq_examples.py │ │ ├── old_test_seq2seq_examples_multi_gpu.py │ │ ├── old_test_tatoeba_conversion.py │ │ ├── pack_dataset.py │ │ ├── requirements.txt │ │ ├── romanian_postprocessing.md │ │ ├── rouge_cli.py │ │ ├── run_distributed_eval.py │ │ ├── run_eval.py │ │ ├── run_eval_search.py │ │ ├── save_len_file.py │ │ ├── save_randomly_initialized_model.py │ │ ├── sentence_splitter.py │ │ ├── seq2seq_trainer.py │ │ ├── seq2seq_training_args.py │ │ ├── test_data │ │ │ ├── fsmt │ │ │ │ ├── build-eval-data.py │ │ │ │ └── fsmt_val_data.json │ │ │ ├── test_data │ │ │ └── wmt_en_ro │ │ │ │ ├── test.source │ │ │ │ ├── test.target │ │ │ │ ├── train.len │ │ │ │ ├── train.source │ │ │ │ ├── train.target │ │ │ │ ├── val.len │ │ │ │ ├── val.source │ │ │ │ └── val.target │ │ ├── train_distil_marian_enro.sh │ │ ├── train_distil_marian_enro_tpu.sh │ │ ├── train_distilbart_cnn.sh │ │ ├── train_mbart_cc25_enro.sh │ │ ├── utils.py │ │ └── xla_spawn.py │ ├── text-classification │ │ └── run_tf_text_classification.py │ └── token-classification │ │ ├── README.md │ │ ├── run.sh │ │ ├── run_chunk.sh │ │ ├── run_ner.py │ │ ├── run_pos.sh │ │ ├── run_tf_ner.py │ │ ├── scripts │ │ └── preprocess.py │ │ ├── tasks.py │ │ └── utils_ner.py ├── pytorch │ ├── README.md │ ├── _tests_requirements.txt │ ├── benchmarking │ │ ├── README.md │ │ ├── plot_csv_file.py │ │ ├── requirements.txt │ │ └── run_benchmark.py │ ├── conftest.py │ ├── language-modeling │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_clm.py │ │ ├── run_clm_no_trainer.py │ │ ├── run_mlm.py │ │ ├── run_mlm_no_trainer.py │ │ └── run_plm.py │ ├── multiple-choice │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_no_trainer.sh │ │ ├── run_swag.py │ │ └── run_swag_no_trainer.py │ ├── question-answering │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_qa.py │ │ ├── run_qa_beam_search.py │ │ ├── run_qa_beam_search_no_trainer.py │ │ ├── run_qa_no_trainer.py │ │ ├── trainer_qa.py │ │ └── utils_qa.py │ ├── summarization │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_summarization.py │ │ ├── run_summarization_dataset.py │ │ ├── run_summarization_no_trainer.py │ │ └── run_summarization_test.py │ ├── test_examples.py │ ├── test_xla_examples.py │ ├── text-classification │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_glue.py │ │ ├── run_glue_no_trainer.py │ │ └── run_xnli.py │ ├── text-generation │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_generation.py │ ├── token-classification │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run.sh │ │ ├── run_ner.py │ │ ├── run_ner_no_trainer.py │ │ └── run_no_trainer.sh │ ├── translation │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_translation.py │ │ └── run_translation_no_trainer.py │ └── xla_spawn.py ├── research_projects │ ├── README.md │ ├── adversarial │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_hans.py │ │ └── utils_hans.py │ ├── bert-loses-patience │ │ ├── README.md │ │ ├── pabee │ │ │ ├── __init__.py │ │ │ ├── modeling_pabee_albert.py │ │ │ └── modeling_pabee_bert.py │ │ ├── requirements.txt │ │ ├── run_glue_with_pabee.py │ │ └── test_run_glue_with_pabee.py │ ├── bertabs │ │ ├── README.md │ │ ├── __init__.py │ │ ├── configuration_bertabs.py │ │ ├── convert_bertabs_original_pytorch_checkpoint.py │ │ ├── modeling_bertabs.py │ │ ├── requirements.txt │ │ ├── run_summarization.py │ │ ├── test_utils_summarization.py │ │ └── utils_summarization.py │ ├── bertology │ │ ├── requirements.txt │ │ ├── run_bertology.py │ │ └── run_prune_gpt.py │ ├── deebert │ │ ├── README.md │ │ ├── entropy_eval.sh │ │ ├── eval_deebert.sh │ │ ├── requirements.txt │ │ ├── run_glue_deebert.py │ │ ├── src │ │ │ ├── __init__.py │ │ │ ├── modeling_highway_bert.py │ │ │ └── modeling_highway_roberta.py │ │ ├── test_glue_deebert.py │ │ └── train_deebert.sh │ ├── distillation │ │ ├── README.md │ │ ├── distiller.py │ │ ├── grouped_batch_sampler.py │ │ ├── lm_seqs_dataset.py │ │ ├── requirements.txt │ │ ├── run_squad_w_distillation.py │ │ ├── scripts │ │ │ ├── binarized_data.py │ │ │ ├── extract.py │ │ │ ├── extract_distilbert.py │ │ │ └── token_counts.py │ │ ├── train.py │ │ ├── training_configs │ │ │ ├── distilbert-base-cased.json │ │ │ ├── distilbert-base-multilingual-cased.json │ │ │ ├── distilbert-base-uncased.json │ │ │ ├── distilgpt2.json │ │ │ └── distilroberta-base.json │ │ └── utils.py │ ├── jax-projects │ │ ├── HOW_TO_PROPOSE_PROJECT.md │ │ └── README.md │ ├── longform-qa │ │ ├── README.md │ │ ├── eli5_app.py │ │ ├── eli5_utils.py │ │ └── requirements.txt │ ├── lxmert │ │ ├── README.md │ │ ├── demo.ipynb │ │ ├── extracting_data.py │ │ ├── modeling_frcnn.py │ │ ├── processing_image.py │ │ ├── requirements.txt │ │ ├── utils.py │ │ └── visualizing_image.py │ ├── mlm_wwm │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_chinese_ref.py │ │ └── run_mlm_wwm.py │ ├── mm-imdb │ │ ├── README.md │ │ ├── run_mmimdb.py │ │ └── utils_mmimdb.py │ ├── movement-pruning │ │ ├── README.md │ │ ├── Saving_PruneBERT.ipynb │ │ ├── bertarize.py │ │ ├── counts_parameters.py │ │ ├── emmental │ │ │ ├── __init__.py │ │ │ ├── configuration_bert_masked.py │ │ │ ├── modeling_bert_masked.py │ │ │ └── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── binarizer.py │ │ │ │ └── masked_nn.py │ │ ├── masked_run_glue.py │ │ ├── masked_run_squad.py │ │ └── requirements.txt │ ├── performer │ │ ├── README.md │ │ ├── full_script.sh │ │ ├── modeling_flax_performer.py │ │ ├── modeling_flax_performer_utils.py │ │ ├── run_mlm_performer.py │ │ └── sanity_script.sh │ ├── pplm │ │ ├── README.md │ │ ├── imgs │ │ │ ├── headfigure.png │ │ │ └── wooly.png │ │ ├── pplm_classification_head.py │ │ ├── requirements.txt │ │ ├── run_pplm.py │ │ └── run_pplm_discrim_train.py │ ├── rag-end2end-retriever │ │ ├── README.md │ │ ├── callbacks_rag.py │ │ ├── distributed_ray_retriever.py │ │ ├── eval_rag.py │ │ ├── finetune_rag.py │ │ ├── finetune_rag_ray_end2end.sh │ │ ├── kb_encode_utils.py │ │ ├── lightning_base.py │ │ ├── requirements.txt │ │ ├── test_run │ │ │ ├── dummy-kb │ │ │ │ └── my_knowledge_dataset.csv │ │ │ ├── dummy-train-data │ │ │ │ ├── train.source │ │ │ │ ├── train.target │ │ │ │ ├── val.source │ │ │ │ └── val.target │ │ │ ├── test_finetune.sh │ │ │ └── test_rag_new_features.sh │ │ ├── use_own_knowledge_dataset.py │ │ └── utils_rag.py │ ├── rag │ │ ├── README.md │ │ ├── __init__.py │ │ ├── _test_finetune_rag.py │ │ ├── callbacks_rag.py │ │ ├── consolidate_rag_checkpoint.py │ │ ├── distributed_pytorch_retriever.py │ │ ├── distributed_ray_retriever.py │ │ ├── eval_rag.py │ │ ├── finetune_rag.py │ │ ├── finetune_rag.sh │ │ ├── finetune_rag_ray.sh │ │ ├── lightning_base.py │ │ ├── parse_dpr_relevance_data.py │ │ ├── requirements.txt │ │ ├── test_data │ │ │ └── my_knowledge_dataset.csv │ │ ├── test_distributed_retriever.py │ │ ├── use_own_knowledge_dataset.py │ │ └── utils_rag.py │ ├── seq2seq-distillation │ │ ├── README.md │ │ ├── _test_bash_script.py │ │ ├── _test_make_student.py │ │ ├── _test_seq2seq_examples.py │ │ ├── _test_seq2seq_examples_multi_gpu.py │ │ ├── callbacks.py │ │ ├── convert_pl_checkpoint_to_hf.py │ │ ├── distil_marian_enro_teacher.sh │ │ ├── distil_marian_no_teacher.sh │ │ ├── distillation.py │ │ ├── dynamic_bs_example.sh │ │ ├── finetune.py │ │ ├── finetune.sh │ │ ├── finetune_bart_tiny.sh │ │ ├── finetune_pegasus_xsum.sh │ │ ├── finetune_t5.sh │ │ ├── lightning_base.py │ │ ├── make_student.py │ │ ├── precomputed_pseudo_labels.md │ │ ├── requirements.txt │ │ ├── run_eval.py │ │ ├── sentence_splitter.py │ │ ├── train_distilbart_cnn.sh │ │ ├── train_distilbart_xsum.sh │ │ ├── train_mbart_cc25_enro.sh │ │ └── utils.py │ ├── wav2vec2 │ │ ├── FINE_TUNE_XLSR_WAV2VEC2.md │ │ ├── README.md │ │ ├── ds_config_wav2vec2_zero2.json │ │ ├── ds_config_wav2vec2_zero3.json │ │ ├── finetune_base_100.sh │ │ ├── finetune_base_timit_asr.sh │ │ ├── finetune_large_lv60_100.sh │ │ ├── finetune_large_lv60_timit_asr.sh │ │ ├── finetune_large_xlsr_53_arabic_speech_corpus.sh │ │ ├── finetune_wav2vec2_xlsr_turkish.sh │ │ ├── requirements.txt │ │ ├── run_asr.py │ │ ├── run_common_voice.py │ │ ├── run_pretrain.py │ │ ├── test_wav2vec2_deepspeed.py │ │ └── vocab │ │ │ └── buckwalter.json │ └── zero-shot-distillation │ │ ├── README.md │ │ └── distill_classifier.py └── tensorflow │ ├── README.md │ ├── benchmarking │ ├── README.md │ ├── plot_csv_file.py │ ├── requirements.txt │ └── run_benchmark_tf.py │ ├── multiple-choice │ ├── README.md │ ├── requirements.txt │ ├── run_tf_multiple_choice.py │ └── utils_multiple_choice.py │ ├── question-answering │ ├── README.md │ ├── requirements.txt │ ├── run_qa.py │ └── utils_qa.py │ └── text-classification │ ├── README.md │ ├── requirements.txt │ ├── run_glue.py │ └── run_text_classification.py ├── exps ├── romanian_postprocess.sh ├── run_en_ro.sh ├── run_glue.sh └── run_xsum.sh ├── hubconf.py ├── img └── intro.png ├── model_cards └── README.md ├── notebooks ├── 01-training-tokenizers.ipynb ├── 02-transformers.ipynb ├── 03-pipelines.ipynb ├── 04-onnx-export.ipynb ├── 05-benchmark.ipynb └── README.md ├── petl ├── __init__.py ├── custom_callback.py ├── dynamic_batching.py ├── options.py ├── petl_enc_model.py ├── petl_encdec_model.py ├── petl_factory.py └── utils.py ├── pyproject.toml ├── scripts ├── check_tokenizers.py ├── fsmt │ ├── convert-allenai-wmt16.sh │ ├── convert-allenai-wmt19.sh │ ├── convert-facebook-wmt19.sh │ ├── eval-allenai-wmt16.sh │ ├── eval-allenai-wmt19.sh │ ├── eval-facebook-wmt19.sh │ ├── fsmt-make-super-tiny-model.py │ ├── fsmt-make-tiny-model.py │ ├── gen-card-allenai-wmt16.py │ ├── gen-card-allenai-wmt19.py │ ├── gen-card-facebook-wmt19.py │ ├── s3-move.sh │ └── tests-to-run.sh ├── pegasus │ └── build_test_sample_spm_no_bos.py ├── stale.py └── tatoeba │ ├── README.md │ └── upload_models.sh ├── setup.cfg ├── setup.py ├── src └── transformers │ ├── __init__.py │ ├── activations.py │ ├── activations_tf.py │ ├── benchmark │ ├── __init__.py │ ├── benchmark.py │ ├── benchmark_args.py │ ├── benchmark_args_tf.py │ ├── benchmark_args_utils.py │ ├── benchmark_tf.py │ └── benchmark_utils.py │ ├── commands │ ├── __init__.py │ ├── add_new_model.py │ ├── convert.py │ ├── download.py │ ├── env.py │ ├── lfs.py │ ├── run.py │ ├── serving.py │ ├── train.py │ ├── transformers_cli.py │ └── user.py │ ├── configuration_utils.py │ ├── convert_graph_to_onnx.py │ ├── convert_pytorch_checkpoint_to_tf2.py │ ├── convert_slow_tokenizer.py │ ├── convert_slow_tokenizers_checkpoints_to_fast.py │ ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py │ ├── data │ ├── __init__.py │ ├── data_collator.py │ ├── datasets │ │ ├── __init__.py │ │ ├── glue.py │ │ ├── language_modeling.py │ │ └── squad.py │ ├── metrics │ │ ├── __init__.py │ │ └── squad_metrics.py │ ├── processors │ │ ├── __init__.py │ │ ├── glue.py │ │ ├── squad.py │ │ ├── utils.py │ │ └── xnli.py │ └── test_generation_utils.py │ ├── debug_utils.py │ ├── deepspeed.py │ ├── dependency_versions_check.py │ ├── dependency_versions_table.py │ ├── feature_extraction_sequence_utils.py │ ├── feature_extraction_utils.py │ ├── file_utils.py │ ├── generation_beam_search.py │ ├── generation_flax_logits_process.py │ ├── generation_flax_utils.py │ ├── generation_logits_process.py │ ├── generation_stopping_criteria.py │ ├── generation_tf_utils.py │ ├── generation_utils.py │ ├── hf_api.py │ ├── hf_argparser.py │ ├── image_utils.py │ ├── integrations.py │ ├── modelcard.py │ ├── modeling_flax_outputs.py │ ├── modeling_flax_pytorch_utils.py │ ├── modeling_flax_utils.py │ ├── modeling_outputs.py │ ├── modeling_tf_outputs.py │ ├── modeling_tf_pytorch_utils.py │ ├── modeling_tf_utils.py │ ├── modeling_utils.py │ ├── models │ ├── __init__.py │ ├── albert │ │ ├── __init__.py │ │ ├── configuration_albert.py │ │ ├── convert_albert_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_albert.py │ │ ├── modeling_tf_albert.py │ │ ├── tokenization_albert.py │ │ └── tokenization_albert_fast.py │ ├── auto │ │ ├── __init__.py │ │ ├── auto_factory.py │ │ ├── configuration_auto.py │ │ ├── feature_extraction_auto.py │ │ ├── modeling_auto.py │ │ ├── modeling_flax_auto.py │ │ ├── modeling_tf_auto.py │ │ └── tokenization_auto.py │ ├── bart │ │ ├── __init__.py │ │ ├── configuration_bart.py │ │ ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_bart.py │ │ ├── modeling_flax_bart.py │ │ ├── modeling_tf_bart.py │ │ ├── tokenization_bart.py │ │ └── tokenization_bart_fast.py │ ├── barthez │ │ ├── __init__.py │ │ ├── tokenization_barthez.py │ │ └── tokenization_barthez_fast.py │ ├── bert │ │ ├── __init__.py │ │ ├── configuration_bert.py │ │ ├── convert_bert_original_tf2_checkpoint_to_pytorch.py │ │ ├── convert_bert_original_tf_checkpoint_to_pytorch.py │ │ ├── convert_bert_pytorch_checkpoint_to_original_tf.py │ │ ├── modeling_bert.py │ │ ├── modeling_flax_bert.py │ │ ├── modeling_tf_bert.py │ │ ├── tokenization_bert.py │ │ └── tokenization_bert_fast.py │ ├── bert_generation │ │ ├── __init__.py │ │ ├── configuration_bert_generation.py │ │ ├── modeling_bert_generation.py │ │ └── tokenization_bert_generation.py │ ├── bert_japanese │ │ ├── __init__.py │ │ └── tokenization_bert_japanese.py │ ├── bertweet │ │ ├── __init__.py │ │ └── tokenization_bertweet.py │ ├── big_bird │ │ ├── __init__.py │ │ ├── configuration_big_bird.py │ │ ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_big_bird.py │ │ ├── modeling_flax_big_bird.py │ │ ├── tokenization_big_bird.py │ │ └── tokenization_big_bird_fast.py │ ├── bigbird_pegasus │ │ ├── __init__.py │ │ ├── configuration_bigbird_pegasus.py │ │ ├── convert_bigbird_pegasus_tf_to_pytorch.py │ │ └── modeling_bigbird_pegasus.py │ ├── blenderbot │ │ ├── __init__.py │ │ ├── configuration_blenderbot.py │ │ ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_blenderbot.py │ │ ├── modeling_tf_blenderbot.py │ │ └── tokenization_blenderbot.py │ ├── blenderbot_small │ │ ├── __init__.py │ │ ├── configuration_blenderbot_small.py │ │ ├── modeling_blenderbot_small.py │ │ ├── modeling_tf_blenderbot_small.py │ │ ├── tokenization_blenderbot_small.py │ │ └── tokenization_blenderbot_small_fast.py │ ├── bort │ │ └── convert_bort_original_gluonnlp_checkpoint_to_pytorch.py │ ├── byt5 │ │ ├── __init__.py │ │ ├── convert_byt5_original_tf_checkpoint_to_pytorch.py │ │ └── tokenization_byt5.py │ ├── camembert │ │ ├── __init__.py │ │ ├── configuration_camembert.py │ │ ├── modeling_camembert.py │ │ ├── modeling_tf_camembert.py │ │ ├── tokenization_camembert.py │ │ └── tokenization_camembert_fast.py │ ├── clip │ │ ├── __init__.py │ │ ├── configuration_clip.py │ │ ├── convert_clip_original_pytorch_to_hf.py │ │ ├── feature_extraction_clip.py │ │ ├── modeling_clip.py │ │ ├── modeling_flax_clip.py │ │ ├── processing_clip.py │ │ ├── tokenization_clip.py │ │ └── tokenization_clip_fast.py │ ├── convbert │ │ ├── __init__.py │ │ ├── configuration_convbert.py │ │ ├── convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py │ │ ├── modeling_convbert.py │ │ ├── modeling_tf_convbert.py │ │ ├── tokenization_convbert.py │ │ └── tokenization_convbert_fast.py │ ├── cpm │ │ ├── __init__.py │ │ └── tokenization_cpm.py │ ├── ctrl │ │ ├── __init__.py │ │ ├── configuration_ctrl.py │ │ ├── modeling_ctrl.py │ │ ├── modeling_tf_ctrl.py │ │ └── tokenization_ctrl.py │ ├── deberta │ │ ├── __init__.py │ │ ├── configuration_deberta.py │ │ ├── modeling_deberta.py │ │ ├── tokenization_deberta.py │ │ └── tokenization_deberta_fast.py │ ├── deberta_v2 │ │ ├── __init__.py │ │ ├── configuration_deberta_v2.py │ │ ├── modeling_deberta_v2.py │ │ └── tokenization_deberta_v2.py │ ├── deit │ │ ├── __init__.py │ │ ├── configuration_deit.py │ │ ├── convert_deit_timm_to_pytorch.py │ │ ├── feature_extraction_deit.py │ │ └── modeling_deit.py │ ├── detr │ │ ├── __init__.py │ │ ├── configuration_detr.py │ │ ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py │ │ ├── feature_extraction_detr.py │ │ └── modeling_detr.py │ ├── dialogpt │ │ ├── __init__.py │ │ └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py │ ├── distilbert │ │ ├── __init__.py │ │ ├── configuration_distilbert.py │ │ ├── modeling_distilbert.py │ │ ├── modeling_tf_distilbert.py │ │ ├── tokenization_distilbert.py │ │ └── tokenization_distilbert_fast.py │ ├── dpr │ │ ├── __init__.py │ │ ├── configuration_dpr.py │ │ ├── convert_dpr_original_checkpoint_to_pytorch.py │ │ ├── modeling_dpr.py │ │ ├── modeling_tf_dpr.py │ │ ├── tokenization_dpr.py │ │ └── tokenization_dpr_fast.py │ ├── electra │ │ ├── __init__.py │ │ ├── configuration_electra.py │ │ ├── convert_electra_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_electra.py │ │ ├── modeling_flax_electra.py │ │ ├── modeling_tf_electra.py │ │ ├── tokenization_electra.py │ │ └── tokenization_electra_fast.py │ ├── encoder_decoder │ │ ├── __init__.py │ │ ├── configuration_encoder_decoder.py │ │ └── modeling_encoder_decoder.py │ ├── flaubert │ │ ├── __init__.py │ │ ├── configuration_flaubert.py │ │ ├── modeling_flaubert.py │ │ ├── modeling_tf_flaubert.py │ │ └── tokenization_flaubert.py │ ├── fsmt │ │ ├── __init__.py │ │ ├── configuration_fsmt.py │ │ ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_fsmt.py │ │ └── tokenization_fsmt.py │ ├── funnel │ │ ├── __init__.py │ │ ├── configuration_funnel.py │ │ ├── convert_funnel_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_funnel.py │ │ ├── modeling_tf_funnel.py │ │ ├── tokenization_funnel.py │ │ └── tokenization_funnel_fast.py │ ├── gpt2 │ │ ├── __init__.py │ │ ├── configuration_gpt2.py │ │ ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_flax_gpt2.py │ │ ├── modeling_gpt2.py │ │ ├── modeling_tf_gpt2.py │ │ ├── tokenization_gpt2.py │ │ └── tokenization_gpt2_fast.py │ ├── gpt_neo │ │ ├── __init__.py │ │ ├── configuration_gpt_neo.py │ │ ├── convert_gpt_neo_mesh_tf_to_pytorch.py │ │ └── modeling_gpt_neo.py │ ├── herbert │ │ ├── __init__.py │ │ ├── tokenization_herbert.py │ │ └── tokenization_herbert_fast.py │ ├── hubert │ │ ├── __init__.py │ │ ├── configuration_hubert.py │ │ ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py │ │ └── modeling_hubert.py │ ├── ibert │ │ ├── __init__.py │ │ ├── configuration_ibert.py │ │ ├── modeling_ibert.py │ │ └── quant_modules.py │ ├── layoutlm │ │ ├── __init__.py │ │ ├── configuration_layoutlm.py │ │ ├── modeling_layoutlm.py │ │ ├── modeling_tf_layoutlm.py │ │ ├── tokenization_layoutlm.py │ │ └── tokenization_layoutlm_fast.py │ ├── led │ │ ├── __init__.py │ │ ├── configuration_led.py │ │ ├── modeling_led.py │ │ ├── modeling_tf_led.py │ │ ├── tokenization_led.py │ │ └── tokenization_led_fast.py │ ├── longformer │ │ ├── __init__.py │ │ ├── configuration_longformer.py │ │ ├── convert_longformer_original_pytorch_lightning_to_pytorch.py │ │ ├── modeling_longformer.py │ │ ├── modeling_tf_longformer.py │ │ ├── tokenization_longformer.py │ │ └── tokenization_longformer_fast.py │ ├── luke │ │ ├── __init__.py │ │ ├── configuration_luke.py │ │ ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_luke.py │ │ └── tokenization_luke.py │ ├── lxmert │ │ ├── __init__.py │ │ ├── configuration_lxmert.py │ │ ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_lxmert.py │ │ ├── modeling_tf_lxmert.py │ │ ├── tokenization_lxmert.py │ │ └── tokenization_lxmert_fast.py │ ├── m2m_100 │ │ ├── __init__.py │ │ ├── configuration_m2m_100.py │ │ ├── convert_m2m100_original_checkpoint_to_pytorch.py │ │ ├── modeling_m2m_100.py │ │ └── tokenization_m2m_100.py │ ├── marian │ │ ├── __init__.py │ │ ├── configuration_marian.py │ │ ├── convert_marian_tatoeba_to_pytorch.py │ │ ├── convert_marian_to_pytorch.py │ │ ├── modeling_marian.py │ │ ├── modeling_tf_marian.py │ │ └── tokenization_marian.py │ ├── mbart │ │ ├── __init__.py │ │ ├── configuration_mbart.py │ │ ├── convert_mbart_original_checkpoint_to_pytorch.py │ │ ├── modeling_mbart.py │ │ ├── modeling_tf_mbart.py │ │ ├── tokenization_mbart.py │ │ ├── tokenization_mbart50.py │ │ ├── tokenization_mbart50_fast.py │ │ └── tokenization_mbart_fast.py │ ├── megatron_bert │ │ ├── __init__.py │ │ ├── configuration_megatron_bert.py │ │ ├── convert_megatron_bert_checkpoint.py │ │ └── modeling_megatron_bert.py │ ├── megatron_gpt2 │ │ └── convert_megatron_gpt2_checkpoint.py │ ├── mmbt │ │ ├── __init__.py │ │ ├── configuration_mmbt.py │ │ └── modeling_mmbt.py │ ├── mobilebert │ │ ├── __init__.py │ │ ├── configuration_mobilebert.py │ │ ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_mobilebert.py │ │ ├── modeling_tf_mobilebert.py │ │ ├── tokenization_mobilebert.py │ │ └── tokenization_mobilebert_fast.py │ ├── mpnet │ │ ├── __init__.py │ │ ├── configuration_mpnet.py │ │ ├── modeling_mpnet.py │ │ ├── modeling_tf_mpnet.py │ │ ├── tokenization_mpnet.py │ │ └── tokenization_mpnet_fast.py │ ├── mt5 │ │ ├── __init__.py │ │ ├── configuration_mt5.py │ │ ├── modeling_mt5.py │ │ └── modeling_tf_mt5.py │ ├── openai │ │ ├── __init__.py │ │ ├── configuration_openai.py │ │ ├── convert_openai_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_openai.py │ │ ├── modeling_tf_openai.py │ │ ├── tokenization_openai.py │ │ └── tokenization_openai_fast.py │ ├── pegasus │ │ ├── __init__.py │ │ ├── configuration_pegasus.py │ │ ├── convert_pegasus_tf_to_pytorch.py │ │ ├── modeling_pegasus.py │ │ ├── modeling_tf_pegasus.py │ │ ├── tokenization_pegasus.py │ │ └── tokenization_pegasus_fast.py │ ├── phobert │ │ ├── __init__.py │ │ └── tokenization_phobert.py │ ├── prophetnet │ │ ├── __init__.py │ │ ├── configuration_prophetnet.py │ │ ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_prophetnet.py │ │ └── tokenization_prophetnet.py │ ├── rag │ │ ├── __init__.py │ │ ├── configuration_rag.py │ │ ├── modeling_rag.py │ │ ├── modeling_tf_rag.py │ │ ├── retrieval_rag.py │ │ └── tokenization_rag.py │ ├── reformer │ │ ├── __init__.py │ │ ├── configuration_reformer.py │ │ ├── convert_reformer_trax_checkpoint_to_pytorch.py │ │ ├── modeling_reformer.py │ │ ├── tokenization_reformer.py │ │ └── tokenization_reformer_fast.py │ ├── retribert │ │ ├── __init__.py │ │ ├── configuration_retribert.py │ │ ├── modeling_retribert.py │ │ ├── tokenization_retribert.py │ │ └── tokenization_retribert_fast.py │ ├── roberta │ │ ├── __init__.py │ │ ├── configuration_roberta.py │ │ ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_flax_roberta.py │ │ ├── modeling_roberta.py │ │ ├── modeling_tf_roberta.py │ │ ├── tokenization_roberta.py │ │ └── tokenization_roberta_fast.py │ ├── roformer │ │ ├── __init__.py │ │ ├── configuration_roformer.py │ │ ├── convert_roformer_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_roformer.py │ │ ├── modeling_tf_roformer.py │ │ ├── tokenization_roformer.py │ │ ├── tokenization_roformer_fast.py │ │ └── tokenization_utils.py │ ├── speech_to_text │ │ ├── __init__.py │ │ ├── configuration_speech_to_text.py │ │ ├── convert_s2t_fairseq_to_tfms.py │ │ ├── feature_extraction_speech_to_text.py │ │ ├── modeling_speech_to_text.py │ │ ├── processing_speech_to_text.py │ │ └── tokenization_speech_to_text.py │ ├── squeezebert │ │ ├── __init__.py │ │ ├── configuration_squeezebert.py │ │ ├── modeling_squeezebert.py │ │ ├── tokenization_squeezebert.py │ │ └── tokenization_squeezebert_fast.py │ ├── t5 │ │ ├── __init__.py │ │ ├── configuration_t5.py │ │ ├── convert_t5_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_flax_t5.py │ │ ├── modeling_t5.py │ │ ├── modeling_tf_t5.py │ │ ├── tokenization_t5.py │ │ └── tokenization_t5_fast.py │ ├── tapas │ │ ├── __init__.py │ │ ├── configuration_tapas.py │ │ ├── convert_tapas_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_tapas.py │ │ └── tokenization_tapas.py │ ├── transfo_xl │ │ ├── __init__.py │ │ ├── configuration_transfo_xl.py │ │ ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_tf_transfo_xl.py │ │ ├── modeling_tf_transfo_xl_utilities.py │ │ ├── modeling_transfo_xl.py │ │ ├── modeling_transfo_xl_utilities.py │ │ └── tokenization_transfo_xl.py │ ├── visual_bert │ │ ├── __init__.py │ │ ├── configuration_visual_bert.py │ │ ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py │ │ └── modeling_visual_bert.py │ ├── vit │ │ ├── __init__.py │ │ ├── configuration_vit.py │ │ ├── convert_vit_timm_to_pytorch.py │ │ ├── feature_extraction_vit.py │ │ ├── modeling_flax_vit.py │ │ └── modeling_vit.py │ ├── wav2vec2 │ │ ├── __init__.py │ │ ├── configuration_wav2vec2.py │ │ ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py │ │ ├── feature_extraction_wav2vec2.py │ │ ├── modeling_tf_wav2vec2.py │ │ ├── modeling_wav2vec2.py │ │ ├── processing_wav2vec2.py │ │ └── tokenization_wav2vec2.py │ ├── xlm │ │ ├── __init__.py │ │ ├── configuration_xlm.py │ │ ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py │ │ ├── modeling_tf_xlm.py │ │ ├── modeling_xlm.py │ │ └── tokenization_xlm.py │ ├── xlm_prophetnet │ │ ├── __init__.py │ │ ├── configuration_xlm_prophetnet.py │ │ ├── modeling_xlm_prophetnet.py │ │ └── tokenization_xlm_prophetnet.py │ ├── xlm_roberta │ │ ├── __init__.py │ │ ├── configuration_xlm_roberta.py │ │ ├── modeling_tf_xlm_roberta.py │ │ ├── modeling_xlm_roberta.py │ │ ├── tokenization_xlm_roberta.py │ │ └── tokenization_xlm_roberta_fast.py │ └── xlnet │ │ ├── __init__.py │ │ ├── configuration_xlnet.py │ │ ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py │ │ ├── modeling_tf_xlnet.py │ │ ├── modeling_xlnet.py │ │ ├── tokenization_xlnet.py │ │ └── tokenization_xlnet_fast.py │ ├── optimization.py │ ├── optimization_tf.py │ ├── pipelines │ ├── __init__.py │ ├── automatic_speech_recognition.py │ ├── base.py │ ├── conversational.py │ ├── feature_extraction.py │ ├── fill_mask.py │ ├── image_classification.py │ ├── question_answering.py │ ├── table_question_answering.py │ ├── text2text_generation.py │ ├── text_classification.py │ ├── text_generation.py │ ├── token_classification.py │ └── zero_shot_classification.py │ ├── sagemaker │ ├── __init__.py │ ├── trainer_sm.py │ └── training_args_sm.py │ ├── testing_utils.py │ ├── tokenization_utils.py │ ├── tokenization_utils_base.py │ ├── tokenization_utils_fast.py │ ├── trainer.py │ ├── trainer_callback.py │ ├── trainer_pt_utils.py │ ├── trainer_seq2seq.py │ ├── trainer_tf.py │ ├── trainer_utils.py │ ├── training_args.py │ ├── training_args_seq2seq.py │ ├── training_args_tf.py │ └── utils │ ├── __init__.py │ ├── coco_classes.py │ ├── dummy_flax_objects.py │ ├── dummy_pt_objects.py │ ├── dummy_sentencepiece_and_speech_objects.py │ ├── dummy_sentencepiece_and_tokenizers_objects.py │ ├── dummy_sentencepiece_objects.py │ ├── dummy_speech_objects.py │ ├── dummy_tf_objects.py │ ├── dummy_timm_and_vision_objects.py │ ├── dummy_timm_objects.py │ ├── dummy_tokenizers_objects.py │ ├── dummy_vision_objects.py │ ├── fx.py │ ├── hp_naming.py │ ├── imagenet_classes.py │ ├── logging.py │ ├── model_parallel_utils.py │ ├── modeling_auto_mapping.py │ ├── notebook.py │ ├── sentencepiece_model_pb2.py │ └── versions.py ├── templates ├── adding_a_new_example_script │ ├── README.md │ ├── cookiecutter.json │ └── {{cookiecutter.directory_name}} │ │ └── run_{{cookiecutter.example_shortcut}}.py └── adding_a_new_model │ ├── ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md │ ├── README.md │ ├── cookiecutter-template-{{cookiecutter.modelname}} │ ├── __init__.py │ ├── configuration.json │ ├── configuration_{{cookiecutter.lowercase_modelname}}.py │ ├── modeling_tf_{{cookiecutter.lowercase_modelname}}.py │ ├── modeling_{{cookiecutter.lowercase_modelname}}.py │ ├── test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py │ ├── test_modeling_{{cookiecutter.lowercase_modelname}}.py │ ├── to_replace_{{cookiecutter.lowercase_modelname}}.py │ ├── tokenization_fast_{{cookiecutter.lowercase_modelname}}.py │ ├── tokenization_{{cookiecutter.lowercase_modelname}}.py │ └── {{cookiecutter.lowercase_modelname}}.rst │ ├── cookiecutter.json │ ├── open_model_proposals │ ├── ADD_BIG_BIRD.md │ └── README.md │ └── tests │ ├── encoder-bert-tokenizer.json │ ├── pt-encoder-bert-tokenizer.json │ ├── pt-seq-2-seq-bart-tokenizer.json │ ├── standalone.json │ ├── tf-encoder-bert-tokenizer.json │ └── tf-seq-2-seq-bart-tokenizer.json ├── tests ├── __init__.py ├── conftest.py ├── deepspeed │ ├── ds_config_zero2.json │ ├── ds_config_zero3.json │ └── test_deepspeed.py ├── extended │ └── test_trainer_ext.py ├── fixtures │ ├── dummy-config.json │ ├── dummy_feature_extractor_config.json │ ├── empty.txt │ ├── input.txt │ ├── preprocessor_config.json │ ├── sample_text.txt │ ├── sample_text_no_unicode.txt │ ├── spiece.model │ ├── test_sentencepiece.model │ ├── test_sentencepiece_bpe.model │ ├── test_sentencepiece_no_bos.model │ └── tests_samples │ │ ├── .gitignore │ │ ├── COCO │ │ ├── 000000039769.png │ │ ├── coco_annotations.txt │ │ ├── coco_panoptic │ │ │ └── 000000039769.png │ │ └── coco_panoptic_annotations.txt │ │ ├── GermEval │ │ ├── dev.txt │ │ ├── labels.txt │ │ └── train.txt │ │ ├── MRPC │ │ ├── dev.csv │ │ ├── dev.tsv │ │ ├── train.csv │ │ └── train.tsv │ │ ├── SQUAD │ │ └── sample.json │ │ ├── STS-B │ │ ├── dev.tsv │ │ └── train.tsv │ │ ├── conll │ │ └── sample.json │ │ ├── swag │ │ └── sample.json │ │ ├── wiki_text │ │ └── wiki_00 │ │ ├── wmt16 │ │ └── sample.json │ │ ├── wmt_en_ro │ │ ├── test.json │ │ ├── train.json │ │ └── val.json │ │ └── xsum │ │ └── sample.json ├── sagemaker │ ├── README.md │ ├── __init__.py │ ├── conftest.py │ ├── scripts │ │ ├── pytorch │ │ │ ├── requirements.txt │ │ │ ├── run_ddp.py │ │ │ └── run_glue_model_parallelism.py │ │ └── tensorflow │ │ │ ├── requirements.txt │ │ │ ├── run_tf.py │ │ │ └── run_tf_dist.py │ ├── test_multi_node_data_parallel.py │ ├── test_multi_node_model_parallel.py │ └── test_single_node_gpu.py ├── test_activations.py ├── test_activations_tf.py ├── test_benchmark.py ├── test_benchmark_tf.py ├── test_cli.py ├── test_configuration_auto.py ├── test_configuration_common.py ├── test_data_collator.py ├── test_doc_samples.py ├── test_feature_extraction_auto.py ├── test_feature_extraction_clip.py ├── test_feature_extraction_common.py ├── test_feature_extraction_deit.py ├── test_feature_extraction_detr.py ├── test_feature_extraction_speech_to_text.py ├── test_feature_extraction_vit.py ├── test_feature_extraction_wav2vec2.py ├── test_file_utils.py ├── test_flax_auto.py ├── test_generation_beam_search.py ├── test_generation_flax_logits_process.py ├── test_generation_flax_utils.py ├── test_generation_logits_process.py ├── test_generation_stopping_criteria.py ├── test_generation_utils.py ├── test_hf_api.py ├── test_hf_argparser.py ├── test_image_utils.py ├── test_logging.py ├── test_model_card.py ├── test_model_output.py ├── test_modeling_albert.py ├── test_modeling_auto.py ├── test_modeling_bart.py ├── test_modeling_bert.py ├── test_modeling_bert_generation.py ├── test_modeling_big_bird.py ├── test_modeling_bigbird_pegasus.py ├── test_modeling_blenderbot.py ├── test_modeling_blenderbot_small.py ├── test_modeling_bort.py ├── test_modeling_camembert.py ├── test_modeling_clip.py ├── test_modeling_common.py ├── test_modeling_convbert.py ├── test_modeling_ctrl.py ├── test_modeling_deberta.py ├── test_modeling_deberta_v2.py ├── test_modeling_deit.py ├── test_modeling_detr.py ├── test_modeling_distilbert.py ├── test_modeling_dpr.py ├── test_modeling_electra.py ├── test_modeling_encoder_decoder.py ├── test_modeling_flaubert.py ├── test_modeling_flax_bart.py ├── test_modeling_flax_bert.py ├── test_modeling_flax_big_bird.py ├── test_modeling_flax_clip.py ├── test_modeling_flax_common.py ├── test_modeling_flax_electra.py ├── test_modeling_flax_gpt2.py ├── test_modeling_flax_roberta.py ├── test_modeling_flax_t5.py ├── test_modeling_flax_vit.py ├── test_modeling_fsmt.py ├── test_modeling_funnel.py ├── test_modeling_gpt2.py ├── test_modeling_gpt_neo.py ├── test_modeling_hubert.py ├── test_modeling_ibert.py ├── test_modeling_layoutlm.py ├── test_modeling_led.py ├── test_modeling_longformer.py ├── test_modeling_luke.py ├── test_modeling_lxmert.py ├── test_modeling_m2m_100.py ├── test_modeling_marian.py ├── test_modeling_mbart.py ├── test_modeling_megatron_bert.py ├── test_modeling_megatron_gpt2.py ├── test_modeling_mobilebert.py ├── test_modeling_mpnet.py ├── test_modeling_mt5.py ├── test_modeling_openai.py ├── test_modeling_pegasus.py ├── test_modeling_prophetnet.py ├── test_modeling_rag.py ├── test_modeling_reformer.py ├── test_modeling_roberta.py ├── test_modeling_roformer.py ├── test_modeling_speech_to_text.py ├── test_modeling_squeezebert.py ├── test_modeling_t5.py ├── test_modeling_tapas.py ├── test_modeling_tf_albert.py ├── test_modeling_tf_auto.py ├── test_modeling_tf_bart.py ├── test_modeling_tf_bert.py ├── test_modeling_tf_blenderbot.py ├── test_modeling_tf_blenderbot_small.py ├── test_modeling_tf_bort.py ├── test_modeling_tf_camembert.py ├── test_modeling_tf_common.py ├── test_modeling_tf_convbert.py ├── test_modeling_tf_ctrl.py ├── test_modeling_tf_distilbert.py ├── test_modeling_tf_dpr.py ├── test_modeling_tf_electra.py ├── test_modeling_tf_flaubert.py ├── test_modeling_tf_funnel.py ├── test_modeling_tf_gpt2.py ├── test_modeling_tf_layoutlm.py ├── test_modeling_tf_led.py ├── test_modeling_tf_longformer.py ├── test_modeling_tf_lxmert.py ├── test_modeling_tf_marian.py ├── test_modeling_tf_mbart.py ├── test_modeling_tf_mobilebert.py ├── test_modeling_tf_mpnet.py ├── test_modeling_tf_mt5.py ├── test_modeling_tf_openai.py ├── test_modeling_tf_pegasus.py ├── test_modeling_tf_pytorch.py ├── test_modeling_tf_rag.py ├── test_modeling_tf_roberta.py ├── test_modeling_tf_roformer.py ├── test_modeling_tf_t5.py ├── test_modeling_tf_transfo_xl.py ├── test_modeling_tf_wav2vec2.py ├── test_modeling_tf_xlm.py ├── test_modeling_tf_xlm_roberta.py ├── test_modeling_tf_xlnet.py ├── test_modeling_transfo_xl.py ├── test_modeling_visual_bert.py ├── test_modeling_vit.py ├── test_modeling_wav2vec2.py ├── test_modeling_xlm.py ├── test_modeling_xlm_prophetnet.py ├── test_modeling_xlm_roberta.py ├── test_modeling_xlnet.py ├── test_offline.py ├── test_onnx.py ├── test_optimization.py ├── test_optimization_tf.py ├── test_pipelines_automatic_speech_recognition.py ├── test_pipelines_common.py ├── test_pipelines_conversational.py ├── test_pipelines_feature_extraction.py ├── test_pipelines_fill_mask.py ├── test_pipelines_image_classification.py ├── test_pipelines_question_answering.py ├── test_pipelines_summarization.py ├── test_pipelines_table_question_answering.py ├── test_pipelines_text2text_generation.py ├── test_pipelines_text_classification.py ├── test_pipelines_text_generation.py ├── test_pipelines_token_classification.py ├── test_pipelines_translation.py ├── test_pipelines_zero_shot.py ├── test_processor_clip.py ├── test_processor_speech_to_text.py ├── test_processor_wav2vec2.py ├── test_retrieval_rag.py ├── test_sequence_feature_extraction_common.py ├── test_skip_decorators.py ├── test_tokenization_albert.py ├── test_tokenization_auto.py ├── test_tokenization_bart.py ├── test_tokenization_barthez.py ├── test_tokenization_bert.py ├── test_tokenization_bert_generation.py ├── test_tokenization_bert_japanese.py ├── test_tokenization_bertweet.py ├── test_tokenization_big_bird.py ├── test_tokenization_blenderbot.py ├── test_tokenization_byt5.py ├── test_tokenization_camembert.py ├── test_tokenization_clip.py ├── test_tokenization_common.py ├── test_tokenization_cpm.py ├── test_tokenization_ctrl.py ├── test_tokenization_deberta.py ├── test_tokenization_deberta_v2.py ├── test_tokenization_distilbert.py ├── test_tokenization_dpr.py ├── test_tokenization_fast.py ├── test_tokenization_fsmt.py ├── test_tokenization_funnel.py ├── test_tokenization_gpt2.py ├── test_tokenization_herbert.py ├── test_tokenization_layoutlm.py ├── test_tokenization_luke.py ├── test_tokenization_lxmert.py ├── test_tokenization_m2m_100.py ├── test_tokenization_marian.py ├── test_tokenization_mbart.py ├── test_tokenization_mbart50.py ├── test_tokenization_mpnet.py ├── test_tokenization_openai.py ├── test_tokenization_pegasus.py ├── test_tokenization_phobert.py ├── test_tokenization_prophetnet.py ├── test_tokenization_rag.py ├── test_tokenization_reformer.py ├── test_tokenization_roberta.py ├── test_tokenization_roformer.py ├── test_tokenization_small_blenderbot.py ├── test_tokenization_speech_to_text.py ├── test_tokenization_squeezebert.py ├── test_tokenization_t5.py ├── test_tokenization_tapas.py ├── test_tokenization_transfo_xl.py ├── test_tokenization_utils.py ├── test_tokenization_wav2vec2.py ├── test_tokenization_xlm.py ├── test_tokenization_xlm_prophetnet.py ├── test_tokenization_xlm_roberta.py ├── test_tokenization_xlnet.py ├── test_trainer.py ├── test_trainer_callback.py ├── test_trainer_distributed.py ├── test_trainer_seq2seq.py ├── test_trainer_tpu.py ├── test_trainer_utils.py ├── test_utils_check_copies.py └── test_versions_utils.py ├── tride ├── controller.py ├── notebook │ └── vis_sentiment_neuron.ipynb ├── openai_sentiment_neuron │ ├── __init__.py │ ├── encoder.py │ ├── sst_binary_demo.py │ └── utils.py └── scripts │ ├── encode_sst.py │ ├── encode_text.py │ ├── generate_text.py │ ├── sklearn_lr_sst.py │ └── visualize_hid.py ├── utils ├── check_copies.py ├── check_dummies.py ├── check_inits.py ├── check_repo.py ├── check_table.py ├── check_tf_ops.py ├── class_mapping_update.py ├── custom_init_isort.py ├── download_glue_data.py ├── get_modified_files.py ├── link_tester.py ├── notification_service.py ├── release.py ├── style_doc.py └── tf_ops │ └── onnx.json └── valohai.yaml /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=transformers 3 | omit = 4 | # skip convertion scripts from testing for now 5 | */convert_* 6 | */__main__.py 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | raise 11 | except 12 | register_parameter -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py eol=lf 2 | *.rst eol=lf 3 | *.md eol=lf -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---new-benchmark.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F5A5 New benchmark" 3 | about: Benchmark a part of this library and share your results 4 | title: "[Benchmark]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🖥 Benchmarking `transformers` 11 | 12 | ## Benchmark 13 | 14 | Which part of `transformers` did you benchmark? 15 | 16 | ## Set-up 17 | 18 | What did you run your benchmarks on? Please include details, such as: CPU, GPU? If using multiple GPUs, which parallelization did you use? 19 | 20 | ## Results 21 | 22 | Put your results here! 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/--new-model-addition.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F31F New model addition" 3 | about: Submit a proposal/request to implement a new Transformer-based model 4 | title: '' 5 | labels: New model 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🌟 New model addition 11 | 12 | ## Model description 13 | 14 | 15 | 16 | ## Open source status 17 | 18 | * [ ] the model implementation is available: (give details) 19 | * [ ] the model weights are available: (give details) 20 | * [ ] who are the authors: (mention them, if possible by @gh-username) 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature request" 3 | about: Submit a proposal/request for a new transformers feature 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🚀 Feature request 11 | 12 | 14 | 15 | ## Motivation 16 | 17 | 20 | 21 | ## Your contribution 22 | 23 | 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question-help.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓ Questions & Help" 3 | about: Post your general questions on the Hugging Face forum: https://discuss.huggingface.co/ 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # ❓ Questions & Help 11 | 12 | 16 | 17 | ## Details 18 | 19 | 20 | 21 | 23 | 24 | **A link to original question on the forum**: 25 | 26 | -------------------------------------------------------------------------------- /.github/conda/build.sh: -------------------------------------------------------------------------------- 1 | $PYTHON setup.py install # Python command to install the script. 2 | -------------------------------------------------------------------------------- /.github/conda/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "transformers" %} 2 | 3 | package: 4 | name: "{{ name|lower }}" 5 | version: "{{ TRANSFORMERS_VERSION }}" 6 | 7 | source: 8 | path: ../../ 9 | 10 | build: 11 | noarch: python 12 | 13 | requirements: 14 | host: 15 | - python 16 | - pip 17 | - numpy >=1.17 18 | - dataclasses 19 | - importlib_metadata 20 | - huggingface_hub 21 | - packaging 22 | - filelock 23 | - requests 24 | - tqdm >=4.27 25 | - sacremoses 26 | - regex !=2019.12.17 27 | - protobuf 28 | - tokenizers >=0.10.1,<0.11.0 29 | - pyyaml 30 | run: 31 | - python 32 | - numpy >=1.17 33 | - dataclasses 34 | - importlib_metadata 35 | - huggingface_hub 36 | - packaging 37 | - filelock 38 | - requests 39 | - tqdm >=4.27 40 | - sacremoses 41 | - regex !=2019.12.17 42 | - protobuf 43 | - tokenizers >=0.10.1,<0.11.0 44 | - pyyaml 45 | 46 | test: 47 | imports: 48 | - transformers 49 | 50 | about: 51 | home: https://huggingface.co 52 | license: Apache License 2.0 53 | license_file: LICENSE 54 | summary: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch and TensorFlow 2.0." 55 | -------------------------------------------------------------------------------- /.github/workflows/github-torch-hub.yml: -------------------------------------------------------------------------------- 1 | name: Torch hub integration 2 | 3 | on: 4 | push: 5 | branches: 6 | - "*" 7 | 8 | jobs: 9 | torch_hub_integration: 10 | runs-on: ubuntu-latest 11 | env: 12 | # TODO quickfix but may need more investigation 13 | ACTIONS_ALLOW_UNSECURE_COMMANDS: True 14 | steps: 15 | # no checkout necessary here. 16 | - name: Extract branch name 17 | run: echo "::set-env name=BRANCH::${GITHUB_REF#refs/heads/}" 18 | - name: Check branch name 19 | run: echo $BRANCH 20 | - name: Set up Python 21 | uses: actions/setup-python@v1 22 | with: 23 | python-version: 3.7 24 | 25 | - name: Loading cache 26 | uses: actions/cache@v2 27 | id: cache 28 | with: 29 | path: ~/.cache/pip 30 | key: v0-torch_hub-${{ hashFiles('setup.py') }} 31 | 32 | - name: Install dependencies 33 | run: | 34 | pip install --upgrade pip 35 | # install torch-hub specific dependencies 36 | pip install -e git+https://github.com/huggingface/transformers.git#egg=transformers[torchhub] 37 | # no longer needed 38 | pip uninstall -y transformers 39 | 40 | #- name: Torch hub list 41 | # run: | 42 | # python -c "import torch; print(torch.hub.list('huggingface/transformers:$BRANCH'))" 43 | 44 | #- name: Torch hub help 45 | # run: | 46 | # python -c "import torch; print(torch.hub.help('huggingface/transformers:$BRANCH', 'modelForSequenceClassification'))" 47 | -------------------------------------------------------------------------------- /.github/workflows/release-conda.yml: -------------------------------------------------------------------------------- 1 | name: Release - Conda 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | branches: 8 | - conda_* 9 | 10 | env: 11 | ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} 12 | 13 | jobs: 14 | build_and_package: 15 | runs-on: ubuntu-latest 16 | defaults: 17 | run: 18 | shell: bash -l {0} 19 | 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v1 23 | 24 | - name: Install miniconda 25 | uses: conda-incubator/setup-miniconda@v2 26 | with: 27 | auto-update-conda: true 28 | auto-activate-base: false 29 | python-version: 3.8 30 | activate-environment: "build-transformers" 31 | channels: huggingface 32 | 33 | - name: Setup conda env 34 | run: | 35 | conda install -c defaults anaconda-client conda-build 36 | 37 | - name: Extract version 38 | run: echo "TRANSFORMERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV 39 | 40 | - name: Build conda packages 41 | run: | 42 | conda info 43 | conda list 44 | conda-build .github/conda 45 | 46 | - name: Upload to Anaconda 47 | run: anaconda upload `conda-build .github/conda --output` --force 48 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Stale Bot 2 | 3 | on: 4 | schedule: 5 | - cron: "0 15 * * *" 6 | 7 | jobs: 8 | close_stale_issues: 9 | name: Close Stale Issues 10 | if: github.repository == 'huggingface/transformers' 11 | runs-on: ubuntu-latest 12 | env: 13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - name: Setup Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: 3.7 21 | 22 | - name: Install requirements 23 | run: | 24 | pip install PyGithub 25 | - name: Close stale issues 26 | run: | 27 | python scripts/stale.py -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | -------------------------------------------------------------------------------- /docker/transformers-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow-cpu \ 19 | torch 20 | 21 | WORKDIR /workspace 22 | COPY . transformers/ 23 | RUN cd transformers/ && \ 24 | python3 -m pip install --no-cache-dir . 25 | 26 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow \ 19 | torch 20 | 21 | RUN git clone https://github.com/NVIDIA/apex 22 | RUN cd apex && \ 23 | python3 setup.py install && \ 24 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 25 | 26 | WORKDIR /workspace 27 | COPY . transformers/ 28 | RUN cd transformers/ && \ 29 | python3 -m pip install --no-cache-dir . 30 | 31 | CMD ["/bin/bash"] 32 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | torch 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-pytorch-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | torch 19 | 20 | RUN git clone https://github.com/NVIDIA/apex 21 | RUN cd apex && \ 22 | python3 setup.py install && \ 23 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 24 | 25 | WORKDIR /workspace 26 | COPY . transformers/ 27 | RUN cd transformers/ && \ 28 | python3 -m pip install --no-cache-dir . 29 | 30 | CMD ["/bin/bash"] 31 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/bert-base-cased.jsonnet: -------------------------------------------------------------------------------- 1 | local base = import 'templates/base.libsonnet'; 2 | local tpus = import 'templates/tpus.libsonnet'; 3 | local utils = import "templates/utils.libsonnet"; 4 | local volumes = import "templates/volumes.libsonnet"; 5 | 6 | local bertBaseCased = base.BaseTest { 7 | frameworkPrefix: "hf", 8 | modelName: "bert-base-cased", 9 | mode: "example", 10 | configMaps: [], 11 | 12 | timeout: 3600, # 1 hour, in seconds 13 | 14 | image: std.extVar('image'), 15 | imageTag: std.extVar('image-tag'), 16 | 17 | tpuSettings+: { 18 | softwareVersion: "pytorch-nightly", 19 | }, 20 | accelerator: tpus.v3_8, 21 | 22 | volumeMap+: { 23 | datasets: volumes.PersistentVolumeSpec { 24 | name: "huggingface-cluster-disk", 25 | mountPath: "/datasets", 26 | }, 27 | }, 28 | command: utils.scriptCommand( 29 | ||| 30 | python -m pytest -s transformers/examples/pytorch/test_xla_examples.py -v 31 | test_exit_code=$? 32 | echo "\nFinished running commands.\n" 33 | test $test_exit_code -eq 0 34 | ||| 35 | ), 36 | }; 37 | 38 | bertBaseCased.oneshotJob 39 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/dataset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolume 3 | metadata: 4 | name: huggingface-cluster-disk 5 | spec: 6 | storageClassName: "" 7 | capacity: 8 | storage: 500Gi 9 | accessModes: 10 | - ReadOnlyMany 11 | claimRef: 12 | namespace: default 13 | name: huggingface-cluster-disk-claim 14 | gcePersistentDisk: 15 | pdName: huggingface-cluster-disk 16 | fsType: ext4 17 | readOnly: true 18 | --- 19 | apiVersion: v1 20 | kind: PersistentVolumeClaim 21 | metadata: 22 | name: huggingface-cluster-disk-claim 23 | spec: 24 | # Specify "" as the storageClassName so it matches the PersistentVolume's StorageClass. 25 | # A nil storageClassName value uses the default StorageClass. For details, see 26 | # https://kubernetes.io/docs/concepts/storage/persistent-volumes/#class-1 27 | storageClassName: "" 28 | accessModes: 29 | - ReadOnlyMany 30 | resources: 31 | requests: 32 | storage: 1Ki 33 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source ~/.bashrc 3 | echo "running docker-entrypoint.sh" 4 | conda activate container 5 | echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS 6 | echo "printed TPU info" 7 | export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}" 8 | exec "$@"#!/bin/bash 9 | -------------------------------------------------------------------------------- /docker/transformers-tensorflow-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | tensorflow-cpu 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-tensorflow-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | tensorflow 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/_static/css/Calibre-Light.ttf -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Medium.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/_static/css/Calibre-Medium.otf -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Regular.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/_static/css/Calibre-Regular.otf -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Thin.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/_static/css/Calibre-Thin.otf -------------------------------------------------------------------------------- /docs/source/_static/css/code-snippets.css: -------------------------------------------------------------------------------- 1 | 2 | .highlight .c1, .highlight .sd{ 3 | color: #999 4 | } 5 | 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc { 7 | color: #FB8D68; 8 | } 9 | 10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow { 11 | color: #6670FF; 12 | } 13 | 14 | .highlight .gp { 15 | color: #FB8D68; 16 | } -------------------------------------------------------------------------------- /docs/source/contributing.md: -------------------------------------------------------------------------------- 1 | ../../CONTRIBUTING.md -------------------------------------------------------------------------------- /docs/source/examples.md: -------------------------------------------------------------------------------- 1 | ../../examples/README.md -------------------------------------------------------------------------------- /docs/source/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/favicon.ico -------------------------------------------------------------------------------- /docs/source/imgs/course_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/course_banner.png -------------------------------------------------------------------------------- /docs/source/imgs/local_attention_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/local_attention_mask.png -------------------------------------------------------------------------------- /docs/source/imgs/ppl_chunked.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/ppl_chunked.gif -------------------------------------------------------------------------------- /docs/source/imgs/ppl_full.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/ppl_full.gif -------------------------------------------------------------------------------- /docs/source/imgs/ppl_sliding.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/ppl_sliding.gif -------------------------------------------------------------------------------- /docs/source/imgs/transformers_logo_name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/transformers_logo_name.png -------------------------------------------------------------------------------- /docs/source/imgs/transformers_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/transformers_overview.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_constant_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_constant_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_hard_restarts_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_cosine_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_warm_restarts_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_linear_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/docs/source/imgs/warmup_linear_schedule.png -------------------------------------------------------------------------------- /docs/source/main_classes/configuration.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Copyright 2020 The HuggingFace Team. All rights reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with 5 | the License. You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on 10 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the 11 | specific language governing permissions and limitations under the License. 12 | 13 | Configuration 14 | ----------------------------------------------------------------------------------------------------------------------- 15 | 16 | The base class :class:`~transformers.PretrainedConfig` implements the common methods for loading/saving a configuration 17 | either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded 18 | from HuggingFace's AWS S3 repository). 19 | 20 | 21 | PretrainedConfig 22 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 23 | 24 | .. autoclass:: transformers.PretrainedConfig 25 | :members: 26 | -------------------------------------------------------------------------------- /docs/source/notebooks.md: -------------------------------------------------------------------------------- 1 | ../../notebooks/README.md -------------------------------------------------------------------------------- /docs/source/troubleshooting.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Troubleshooting 18 | 19 | This document is to help find solutions for common problems. 20 | 21 | ## Firewalled environments 22 | 23 | Some cloud and intranet setups have their GPU instances firewalled to the outside world, so if your script is trying to download model weights or datasets it will first hang and then timeout with an error message like: 24 | 25 | ``` 26 | ValueError: Connection error, and we cannot find the requested files in the cached path. 27 | Please try again or make sure your Internet connection is on. 28 | ``` 29 | 30 | One possible solution in this situation is to use the ["offline-mode"](https://huggingface.co/transformers/installation.html#offline-mode). 31 | -------------------------------------------------------------------------------- /examples/flax/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.4 5 | optax>=0.0.8 6 | -------------------------------------------------------------------------------- /examples/flax/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.4 5 | optax>=0.0.8 6 | -------------------------------------------------------------------------------- /examples/legacy/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Legacy examples 18 | 19 | This folder contains examples which are not actively maintained (mostly contributed by the community). 20 | 21 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working. 22 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | pytorch-lightning==1.0.4 9 | matplotlib 10 | git-python==1.0.3 11 | faiss-cpu 12 | streamlit 13 | elasticsearch 14 | nltk 15 | pandas 16 | datasets >= 1.1.3 17 | fire 18 | pytest 19 | conllu 20 | sentencepiece != 0.1.92 21 | protobuf 22 | ray 23 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/run_glue.sh: -------------------------------------------------------------------------------- 1 | # Install example requirements 2 | pip install -r ../requirements.txt 3 | 4 | # Download glue data 5 | python3 ../../utils/download_glue_data.py 6 | 7 | export TASK=mrpc 8 | export DATA_DIR=./glue_data/MRPC/ 9 | export MAX_LENGTH=128 10 | export LEARNING_RATE=2e-5 11 | export BERT_MODEL=bert-base-cased 12 | export BATCH_SIZE=32 13 | export NUM_EPOCHS=3 14 | export SEED=2 15 | export OUTPUT_DIR_NAME=mrpc-pl-bert 16 | export CURRENT_DIR=${PWD} 17 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 18 | 19 | # Make output directory if it doesn't exist 20 | mkdir -p $OUTPUT_DIR 21 | # Add parent directory to python path to access lightning_base.py 22 | export PYTHONPATH="../":"${PYTHONPATH}" 23 | 24 | python3 run_glue.py --gpus 1 --data_dir $DATA_DIR \ 25 | --task $TASK \ 26 | --model_name_or_path $BERT_MODEL \ 27 | --output_dir $OUTPUT_DIR \ 28 | --max_seq_length $MAX_LENGTH \ 29 | --learning_rate $LEARNING_RATE \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --train_batch_size $BATCH_SIZE \ 32 | --seed $SEED \ 33 | --do_train \ 34 | --do_predict 35 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/run_ner.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # for seqeval metrics import 4 | pip install -r ../requirements.txt 5 | 6 | ## The relevant files are currently on a shared Google 7 | ## drive at https://drive.google.com/drive/folders/1kC0I2UGl2ltrluI9NqDjaQJGw5iliw_J 8 | ## Monitor for changes and eventually migrate to nlp dataset 9 | curl -L 'https://drive.google.com/uc?export=download&id=1Jjhbal535VVz2ap4v4r_rN1UEHTdLK5P' \ 10 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp 11 | curl -L 'https://drive.google.com/uc?export=download&id=1ZfRcQThdtAR5PPRjIDtrVP7BtXSCUBbm' \ 12 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp 13 | curl -L 'https://drive.google.com/uc?export=download&id=1u9mb7kNJHWQCWyweMDRMuTFoOHOfeBTH' \ 14 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp 15 | 16 | export MAX_LENGTH=128 17 | export BERT_MODEL=bert-base-multilingual-cased 18 | python3 scripts/preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt 19 | python3 scripts/preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt 20 | python3 scripts/preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt 21 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt 22 | export BATCH_SIZE=32 23 | export NUM_EPOCHS=3 24 | export SEED=1 25 | 26 | export OUTPUT_DIR_NAME=germeval-model 27 | export CURRENT_DIR=${PWD} 28 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 29 | mkdir -p $OUTPUT_DIR 30 | 31 | # Add parent directory to python path to access lightning_base.py 32 | export PYTHONPATH="../":"${PYTHONPATH}" 33 | 34 | python3 run_ner.py --data_dir ./ \ 35 | --labels ./labels.txt \ 36 | --model_name_or_path $BERT_MODEL \ 37 | --output_dir $OUTPUT_DIR \ 38 | --max_seq_length $MAX_LENGTH \ 39 | --num_train_epochs $NUM_EPOCHS \ 40 | --train_batch_size $BATCH_SIZE \ 41 | --seed $SEED \ 42 | --gpus 1 \ 43 | --do_train \ 44 | --do_predict 45 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/run_pos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if ! [ -f ./dev.txt ]; then 3 | echo "Download dev dataset...." 4 | curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu' 5 | fi 6 | 7 | if ! [ -f ./test.txt ]; then 8 | echo "Download test dataset...." 9 | curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu' 10 | fi 11 | 12 | if ! [ -f ./train.txt ]; then 13 | echo "Download train dataset...." 14 | curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu' 15 | fi 16 | 17 | export MAX_LENGTH=200 18 | export BERT_MODEL=bert-base-uncased 19 | export OUTPUT_DIR=postagger-model 20 | export BATCH_SIZE=32 21 | export NUM_EPOCHS=3 22 | export SAVE_STEPS=750 23 | export SEED=1 24 | 25 | 26 | # Add parent directory to python path to access lightning_base.py 27 | export PYTHONPATH="../":"${PYTHONPATH}" 28 | 29 | python3 run_ner.py --data_dir ./ \ 30 | --task_type POS \ 31 | --model_name_or_path $BERT_MODEL \ 32 | --output_dir $OUTPUT_DIR \ 33 | --max_seq_length $MAX_LENGTH \ 34 | --num_train_epochs $NUM_EPOCHS \ 35 | --train_batch_size $BATCH_SIZE \ 36 | --seed $SEED \ 37 | --gpus 1 \ 38 | --do_train \ 39 | --do_predict 40 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__))) 6 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/convert_model_to_fp16.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import Union 17 | 18 | import fire 19 | import torch 20 | from tqdm import tqdm 21 | 22 | 23 | def convert(src_path: str, map_location: str = "cpu", save_path: Union[str, None] = None) -> None: 24 | """Convert a pytorch_model.bin or model.pt file to torch.float16 for faster downloads, less disk space.""" 25 | state_dict = torch.load(src_path, map_location=map_location) 26 | for k, v in tqdm(state_dict.items()): 27 | if not isinstance(v, torch.Tensor): 28 | raise TypeError("FP16 conversion only works on paths that are saved state dicts, like pytorch_model.bin") 29 | state_dict[k] = v.half() 30 | if save_path is None: # overwrite src_path 31 | save_path = src_path 32 | torch.save(state_dict, save_path) 33 | 34 | 35 | if __name__ == "__main__": 36 | fire.Fire(convert) 37 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/finetune.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 16 | # run ./finetune.sh --help to see all the possible options 17 | python finetune_trainer.py \ 18 | --learning_rate=3e-5 \ 19 | --fp16 \ 20 | --do_train --do_eval --do_predict \ 21 | --evaluation_strategy steps \ 22 | --predict_with_generate \ 23 | --n_val 1000 \ 24 | "$@" 25 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/finetune_tpu.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | export TPU_NUM_CORES=8 16 | 17 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 18 | # run ./finetune_tpu.sh --help to see all the possible options 19 | python xla_spawn.py --num_cores $TPU_NUM_CORES \ 20 | finetune_trainer.py \ 21 | --learning_rate=3e-5 \ 22 | --do_train --do_eval \ 23 | --evaluation_strategy steps \ 24 | --prediction_loss_only \ 25 | --n_val 1000 \ 26 | "$@" 27 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/minify_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from pathlib import Path 17 | 18 | import fire 19 | 20 | 21 | def minify(src_dir: str, dest_dir: str, n: int): 22 | """Write first n lines of each file f in src_dir to dest_dir/f""" 23 | src_dir = Path(src_dir) 24 | dest_dir = Path(dest_dir) 25 | dest_dir.mkdir(exist_ok=True) 26 | for path in src_dir.iterdir(): 27 | new = [x.rstrip() for x in list(path.open().readlines())][:n] 28 | dest_path = dest_dir.joinpath(path.name) 29 | print(dest_path) 30 | dest_path.open("w").write("\n".join(new)) 31 | 32 | 33 | if __name__ == "__main__": 34 | fire.Fire(minify) 35 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/old_test_tatoeba_conversion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import tempfile 17 | import unittest 18 | 19 | from transformers.file_utils import cached_property 20 | from transformers.models.marian.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter 21 | from transformers.testing_utils import slow 22 | 23 | 24 | @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.") 25 | class TatoebaConversionTester(unittest.TestCase): 26 | @cached_property 27 | def resolver(self): 28 | tmp_dir = tempfile.mkdtemp() 29 | return TatoebaConverter(save_dir=tmp_dir) 30 | 31 | @slow 32 | def test_resolver(self): 33 | self.resolver.convert_models(["heb-eng"]) 34 | 35 | @slow 36 | def test_model_card(self): 37 | content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True) 38 | assert mmeta["long_pair"] == "heb-eng" 39 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/rouge_cli.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import fire 16 | 17 | from utils import calculate_rouge, save_json 18 | 19 | 20 | def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs): 21 | """Kwargs will be passed to calculate_rouge""" 22 | pred_lns = [x.strip() for x in open(pred_path).readlines()] 23 | tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)] 24 | metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs) 25 | if save_path is not None: 26 | save_json(metrics, save_path, indent=None) 27 | return metrics # these print nicely 28 | 29 | 30 | if __name__ == "__main__": 31 | fire.Fire(calculate_rouge_path) 32 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/save_randomly_initialized_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import fire 17 | 18 | from transformers import AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer 19 | 20 | 21 | def save_randomly_initialized_version(config_name: str, save_dir: str, **config_kwargs): 22 | """Save a randomly initialized version of a model using a pretrained config. 23 | Args: 24 | config_name: which config to use 25 | save_dir: where to save the resulting model and tokenizer 26 | config_kwargs: Passed to AutoConfig 27 | 28 | Usage:: 29 | save_randomly_initialized_version("facebook/bart-large-cnn", "distilbart_random_cnn_6_3", encoder_layers=6, decoder_layers=3, num_beams=3) 30 | """ 31 | cfg = AutoConfig.from_pretrained(config_name, **config_kwargs) 32 | model = AutoModelForSeq2SeqLM.from_config(cfg) 33 | model.save_pretrained(save_dir) 34 | AutoTokenizer.from_pretrained(config_name).save_pretrained(save_dir) 35 | return model 36 | 37 | 38 | if __name__ == "__main__": 39 | fire.Fire(save_randomly_initialized_version) 40 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/sentence_splitter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | 16 | from filelock import FileLock 17 | 18 | 19 | try: 20 | import nltk 21 | 22 | NLTK_AVAILABLE = True 23 | except (ImportError, ModuleNotFoundError): 24 | NLTK_AVAILABLE = False 25 | 26 | if NLTK_AVAILABLE: 27 | with FileLock(".lock") as lock: 28 | nltk.download("punkt", quiet=True) 29 | 30 | 31 | def add_newline_to_end_of_each_sentence(x: str) -> str: 32 | """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS.""" 33 | re.sub("", "", x) # remove pegasus newline char 34 | assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)" 35 | return "\n".join(nltk.sent_tokenize(x)) 36 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import io 4 | import json 5 | import subprocess 6 | 7 | 8 | pairs = [ 9 | ["en", "ru"], 10 | ["ru", "en"], 11 | ["en", "de"], 12 | ["de", "en"], 13 | ] 14 | 15 | n_objs = 8 16 | 17 | 18 | def get_all_data(pairs, n_objs): 19 | text = {} 20 | for src, tgt in pairs: 21 | pair = f"{src}-{tgt}" 22 | cmd = f"sacrebleu -t wmt19 -l {pair} --echo src".split() 23 | src_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines() 24 | cmd = f"sacrebleu -t wmt19 -l {pair} --echo ref".split() 25 | tgt_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines() 26 | text[pair] = {"src": src_lines[:n_objs], "tgt": tgt_lines[:n_objs]} 27 | return text 28 | 29 | 30 | text = get_all_data(pairs, n_objs) 31 | filename = "./fsmt_val_data.json" 32 | with io.open(filename, "w", encoding="utf-8") as f: 33 | bleu_data = json.dump(text, f, indent=2, ensure_ascii=False) 34 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/test_data: -------------------------------------------------------------------------------- 1 | seq2seq/test_data -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/wmt_en_ro/train.len: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/legacy/seq2seq/test_data/wmt_en_ro/train.len -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/wmt_en_ro/val.len: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/legacy/seq2seq/test_data/wmt_en_ro/val.len -------------------------------------------------------------------------------- /examples/legacy/seq2seq/train_distil_marian_enro.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | export WANDB_PROJECT=distil-marian 16 | export BS=64 17 | export GAS=1 18 | export m=sshleifer/student_marian_en_ro_6_3 19 | export MAX_LEN=128 20 | python finetune_trainer.py \ 21 | --tokenizer_name $m --model_name_or_path $m \ 22 | --data_dir $ENRO_DIR \ 23 | --output_dir marian_en_ro_6_3 --overwrite_output_dir \ 24 | --learning_rate=3e-4 \ 25 | --warmup_steps 500 --sortish_sampler \ 26 | --fp16 \ 27 | --gradient_accumulation_steps=$GAS \ 28 | --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \ 29 | --freeze_encoder --freeze_embeds \ 30 | --num_train_epochs=6 \ 31 | --save_steps 3000 --eval_steps 3000 \ 32 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN \ 33 | --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \ 34 | --do_train --do_eval --do_predict \ 35 | --evaluation_strategy steps \ 36 | --predict_with_generate --logging_first_step \ 37 | --task translation --label_smoothing_factor 0.1 \ 38 | "$@" 39 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/train_distil_marian_enro_tpu.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | export WANDB_PROJECT=distil-marian 16 | export BS=64 17 | export m=sshleifer/student_marian_en_ro_6_3 18 | export MAX_LEN=128 19 | export TPU_NUM_CORES=8 20 | 21 | python xla_spawn.py --num_cores $TPU_NUM_CORES \ 22 | finetune_trainer.py \ 23 | --tokenizer_name $m --model_name_or_path $m \ 24 | --data_dir $ENRO_DIR \ 25 | --output_dir marian_en_ro_6_3 --overwrite_output_dir \ 26 | --learning_rate=3e-4 \ 27 | --warmup_steps 500 \ 28 | --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \ 29 | --freeze_encoder --freeze_embeds \ 30 | --num_train_epochs=6 \ 31 | --save_steps 500 --eval_steps 500 \ 32 | --logging_first_step --logging_steps 200 \ 33 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN \ 34 | --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \ 35 | --do_train --do_eval \ 36 | --evaluation_strategy steps \ 37 | --prediction_loss_only \ 38 | --task translation --label_smoothing_factor 0.1 \ 39 | "$@" 40 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/train_distilbart_cnn.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | export WANDB_PROJECT=distilbart-trainer 16 | export BS=32 17 | export m=sshleifer/student_cnn_12_6 18 | export tok=facebook/bart-large 19 | export MAX_TGT_LEN=142 20 | 21 | python finetune_trainer.py \ 22 | --model_name_or_path $m --tokenizer_name $tok \ 23 | --data_dir cnn_dm \ 24 | --output_dir distilbart-cnn-12-6 --overwrite_output_dir \ 25 | --learning_rate=3e-5 \ 26 | --warmup_steps 500 --sortish_sampler \ 27 | --fp16 \ 28 | --n_val 500 \ 29 | --gradient_accumulation_steps=1 \ 30 | --per_device_train_batch_size=$BS --per_device_eval_batch_size=$BS \ 31 | --freeze_encoder --freeze_embeds \ 32 | --num_train_epochs=2 \ 33 | --save_steps 3000 --eval_steps 3000 \ 34 | --logging_first_step \ 35 | --max_target_length 56 --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN\ 36 | --do_train --do_eval --do_predict \ 37 | --evaluation_strategy steps \ 38 | --predict_with_generate --sortish_sampler \ 39 | "$@" 40 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/train_mbart_cc25_enro.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | python finetune_trainer.py \ 16 | --model_name_or_path=facebook/mbart-large-cc25 \ 17 | --data_dir $ENRO_DIR \ 18 | --output_dir mbart_cc25_enro --overwrite_output_dir \ 19 | --learning_rate=3e-5 \ 20 | --warmup_steps 500 \ 21 | --fp16 \ 22 | --label_smoothing 0.1 \ 23 | --adam_eps 1e-06 \ 24 | --src_lang en_XX --tgt_lang ro_RO \ 25 | --freeze_embeds \ 26 | --per_device_train_batch_size=4 --per_device_eval_batch_size=4 \ 27 | --max_source_length 128 --max_target_length 128 --val_max_target_length 128 --test_max_target_length 128\ 28 | --sortish_sampler \ 29 | --num_train_epochs 6 \ 30 | --save_steps 25000 --eval_steps 25000 --logging_steps 1000 \ 31 | --do_train --do_eval --do_predict \ 32 | --evaluation_strategy steps \ 33 | --predict_with_generate --logging_first_step \ 34 | --task translation \ 35 | "$@" 36 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/run.sh: -------------------------------------------------------------------------------- 1 | ## The relevant files are currently on a shared Google 2 | ## drive at https://drive.google.com/drive/folders/1kC0I2UGl2ltrluI9NqDjaQJGw5iliw_J 3 | ## Monitor for changes and eventually migrate to nlp dataset 4 | curl -L 'https://drive.google.com/uc?export=download&id=1Jjhbal535VVz2ap4v4r_rN1UEHTdLK5P' \ 5 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp 6 | curl -L 'https://drive.google.com/uc?export=download&id=1ZfRcQThdtAR5PPRjIDtrVP7BtXSCUBbm' \ 7 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp 8 | curl -L 'https://drive.google.com/uc?export=download&id=1u9mb7kNJHWQCWyweMDRMuTFoOHOfeBTH' \ 9 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp 10 | 11 | export MAX_LENGTH=128 12 | export BERT_MODEL=bert-base-multilingual-cased 13 | python3 scripts/preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt 14 | python3 scripts/preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt 15 | python3 scripts/preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt 16 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt 17 | export OUTPUT_DIR=germeval-model 18 | export BATCH_SIZE=32 19 | export NUM_EPOCHS=3 20 | export SAVE_STEPS=750 21 | export SEED=1 22 | 23 | python3 run_ner.py \ 24 | --task_type NER \ 25 | --data_dir . \ 26 | --labels ./labels.txt \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/run_chunk.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f ./dev.txt ]; then 2 | echo "Downloading CONLL2003 dev dataset...." 3 | curl -L -o ./dev.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/valid.txt' 4 | fi 5 | 6 | if ! [ -f ./test.txt ]; then 7 | echo "Downloading CONLL2003 test dataset...." 8 | curl -L -o ./test.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/test.txt' 9 | fi 10 | 11 | if ! [ -f ./train.txt ]; then 12 | echo "Downloading CONLL2003 train dataset...." 13 | curl -L -o ./train.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/train.txt' 14 | fi 15 | 16 | export MAX_LENGTH=200 17 | export BERT_MODEL=bert-base-uncased 18 | export OUTPUT_DIR=chunker-model 19 | export BATCH_SIZE=32 20 | export NUM_EPOCHS=3 21 | export SAVE_STEPS=750 22 | export SEED=1 23 | 24 | python3 run_ner.py \ 25 | --task_type Chunk \ 26 | --data_dir . \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | 38 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/run_pos.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f ./dev.txt ]; then 2 | echo "Download dev dataset...." 3 | curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu' 4 | fi 5 | 6 | if ! [ -f ./test.txt ]; then 7 | echo "Download test dataset...." 8 | curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu' 9 | fi 10 | 11 | if ! [ -f ./train.txt ]; then 12 | echo "Download train dataset...." 13 | curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu' 14 | fi 15 | 16 | export MAX_LENGTH=200 17 | export BERT_MODEL=bert-base-uncased 18 | export OUTPUT_DIR=postagger-model 19 | export BATCH_SIZE=32 20 | export NUM_EPOCHS=3 21 | export SAVE_STEPS=750 22 | export SEED=1 23 | 24 | python3 run_ner.py \ 25 | --task_type POS \ 26 | --data_dir . \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | 38 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/scripts/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from transformers import AutoTokenizer 4 | 5 | 6 | dataset = sys.argv[1] 7 | model_name_or_path = sys.argv[2] 8 | max_len = int(sys.argv[3]) 9 | 10 | subword_len_counter = 0 11 | 12 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 13 | max_len -= tokenizer.num_special_tokens_to_add() 14 | 15 | with open(dataset, "rt") as f_p: 16 | for line in f_p: 17 | line = line.rstrip() 18 | 19 | if not line: 20 | print(line) 21 | subword_len_counter = 0 22 | continue 23 | 24 | token = line.split()[0] 25 | 26 | current_subwords_len = len(tokenizer.tokenize(token)) 27 | 28 | # Token contains strange control characters like \x96 or \x95 29 | # Just filter out the complete line 30 | if current_subwords_len == 0: 31 | continue 32 | 33 | if (subword_len_counter + current_subwords_len) > max_len: 34 | print("") 35 | print(line) 36 | subword_len_counter = current_subwords_len 37 | continue 38 | 39 | subword_len_counter += current_subwords_len 40 | 41 | print(line) 42 | -------------------------------------------------------------------------------- /examples/pytorch/_tests_requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu >= 1.4.12 6 | rouge-score 7 | tensorflow_datasets 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | -------------------------------------------------------------------------------- /examples/pytorch/benchmarking/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # 🤗 Benchmark results 18 | 19 | Here, you can find a list of the different benchmark results created by the community. 20 | 21 | If you would like to list benchmark results on your favorite models of the [model hub](https://huggingface.co/models) here, please open a Pull Request and add it below. 22 | 23 | | Benchmark description | Results | Environment info | Author | 24 | |:----------|:-------------|:-------------|------:| 25 | | PyTorch Benchmark on inference for `bert-base-cased` |[memory](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_memory.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 26 | | PyTorch Benchmark on inference for `bert-base-cased` |[time](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_time.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 27 | -------------------------------------------------------------------------------- /examples/pytorch/benchmarking/requirements.txt: -------------------------------------------------------------------------------- 1 | torch >= 1.3 -------------------------------------------------------------------------------- /examples/pytorch/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import sys 19 | import warnings 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(dirname(dirname(__file__))), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | 29 | # silence FutureWarning warnings in tests since often we can't act on them until 30 | # they become normal warnings - i.e. the tests still need to test the current functionality 31 | warnings.simplefilter(action="ignore", category=FutureWarning) 32 | 33 | 34 | def pytest_addoption(parser): 35 | from transformers.testing_utils import pytest_addoption_shared 36 | 37 | pytest_addoption_shared(parser) 38 | 39 | 40 | def pytest_terminal_summary(terminalreporter): 41 | from transformers.testing_utils import pytest_terminal_summary_main 42 | 43 | make_reports = terminalreporter.config.getoption("--make-reports") 44 | if make_reports: 45 | pytest_terminal_summary_main(terminalreporter, id=make_reports) 46 | -------------------------------------------------------------------------------- /examples/pytorch/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | torch >= 1.3 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | protobuf 5 | -------------------------------------------------------------------------------- /examples/pytorch/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece != 0.1.92 2 | protobuf 3 | torch >= 1.3 4 | -------------------------------------------------------------------------------- /examples/pytorch/multiple-choice/run_no_trainer.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | accelerate launch run_swag_no_trainer.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name swag \ 18 | --output_dir /tmp/test-swag-no-trainer \ 19 | --pad_to_max_length 20 | -------------------------------------------------------------------------------- /examples/pytorch/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | torch >= 1.3.0 3 | -------------------------------------------------------------------------------- /examples/pytorch/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | protobuf 4 | rouge-score 5 | nltk 6 | py7zr 7 | torch >= 1.3 8 | -------------------------------------------------------------------------------- /examples/pytorch/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | protobuf 5 | torch >= 1.3 6 | -------------------------------------------------------------------------------- /examples/pytorch/text-generation/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | ## Language generation 18 | 19 | Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/pytorch/text-generation/run_generation.py). 20 | 21 | Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL, XLNet, CTRL. 22 | A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you 23 | can try out the different models available in the library. 24 | 25 | Example usage: 26 | 27 | ```bash 28 | python run_generation.py \ 29 | --model_type=gpt2 \ 30 | --model_name_or_path=gpt2 31 | ``` 32 | -------------------------------------------------------------------------------- /examples/pytorch/text-generation/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece != 0.1.92 2 | protobuf 3 | torch >= 1.3 4 | -------------------------------------------------------------------------------- /examples/pytorch/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | seqeval 2 | datasets >= 1.8.0 3 | torch >= 1.3 4 | -------------------------------------------------------------------------------- /examples/pytorch/token-classification/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | python3 run_ner.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name conll2003 \ 18 | --output_dir /tmp/test-ner \ 19 | --do_train \ 20 | --do_eval 21 | -------------------------------------------------------------------------------- /examples/pytorch/token-classification/run_no_trainer.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | accelerate launch run_ner_no_trainer.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name conll2003 \ 18 | --output_dir /tmp/test-ner \ 19 | --pad_to_max_length \ 20 | --task_name ner \ 21 | --return_entity_level_metrics 22 | -------------------------------------------------------------------------------- /examples/pytorch/translation/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | protobuf 4 | sacrebleu >= 1.4.12 5 | py7zr 6 | torch >= 1.3 7 | -------------------------------------------------------------------------------- /examples/research_projects/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Research projects 18 | 19 | This folder contains various research projects using 🤗 Transformers. They are not maintained and require a specific 20 | version of 🤗 Transformers that is indicated in the requirements file of each folder. Updating them to the most recent version of the library will require some work. 21 | 22 | To use any of them, just run the command 23 | ``` 24 | pip install -r requirements.txt 25 | ``` 26 | inside the folder of your choice. 27 | 28 | If you need help with any of those, contact the author(s), indicated at the top of the `README` of each folder. 29 | -------------------------------------------------------------------------------- /examples/research_projects/adversarial/README.md: -------------------------------------------------------------------------------- 1 | ## Adversarial evaluation of model performances 2 | 3 | Here is an example on evaluating a model using adversarial evaluation of natural language inference with the Heuristic Analysis for NLI Systems (HANS) dataset [McCoy et al., 2019](https://arxiv.org/abs/1902.01007). The example was gracefully provided by [Nafise Sadat Moosavi](https://github.com/ns-moosavi). 4 | 5 | The HANS dataset can be downloaded from [this location](https://github.com/tommccoy1/hans). 6 | 7 | This is an example of using test_hans.py: 8 | 9 | ```bash 10 | export HANS_DIR=path-to-hans 11 | export MODEL_TYPE=type-of-the-model-e.g.-bert-roberta-xlnet-etc 12 | export MODEL_PATH=path-to-the-model-directory-that-is-trained-on-NLI-e.g.-by-using-run_glue.py 13 | 14 | python run_hans.py \ 15 | --task_name hans \ 16 | --model_type $MODEL_TYPE \ 17 | --do_eval \ 18 | --data_dir $HANS_DIR \ 19 | --model_name_or_path $MODEL_PATH \ 20 | --max_seq_length 128 \ 21 | --output_dir $MODEL_PATH \ 22 | ``` 23 | 24 | This will create the hans_predictions.txt file in MODEL_PATH, which can then be evaluated using hans/evaluate_heur_output.py from the HANS dataset. 25 | 26 | The results of the BERT-base model that is trained on MNLI using batch size 8 and the random seed 42 on the HANS dataset is as follows: 27 | 28 | ```bash 29 | Heuristic entailed results: 30 | lexical_overlap: 0.9702 31 | subsequence: 0.9942 32 | constituent: 0.9962 33 | 34 | Heuristic non-entailed results: 35 | lexical_overlap: 0.199 36 | subsequence: 0.0396 37 | constituent: 0.118 38 | ``` 39 | -------------------------------------------------------------------------------- /examples/research_projects/adversarial/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /examples/research_projects/bert-loses-patience/pabee/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/bert-loses-patience/pabee/__init__.py -------------------------------------------------------------------------------- /examples/research_projects/bert-loses-patience/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 -------------------------------------------------------------------------------- /examples/research_projects/bert-loses-patience/test_run_glue_with_pabee.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | from unittest.mock import patch 5 | 6 | import run_glue_with_pabee 7 | from transformers.testing_utils import TestCasePlus 8 | 9 | 10 | logging.basicConfig(level=logging.DEBUG) 11 | 12 | logger = logging.getLogger() 13 | 14 | 15 | def get_setup_file(): 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("-f") 18 | args = parser.parse_args() 19 | return args.f 20 | 21 | 22 | class PabeeTests(TestCasePlus): 23 | def test_run_glue(self): 24 | stream_handler = logging.StreamHandler(sys.stdout) 25 | logger.addHandler(stream_handler) 26 | 27 | tmp_dir = self.get_auto_remove_tmp_dir() 28 | testargs = f""" 29 | run_glue_with_pabee.py 30 | --model_type albert 31 | --model_name_or_path albert-base-v2 32 | --data_dir ./tests/fixtures/tests_samples/MRPC/ 33 | --output_dir {tmp_dir} 34 | --overwrite_output_dir 35 | --task_name mrpc 36 | --do_train 37 | --do_eval 38 | --per_gpu_train_batch_size=2 39 | --per_gpu_eval_batch_size=1 40 | --learning_rate=2e-5 41 | --max_steps=50 42 | --warmup_steps=2 43 | --seed=42 44 | --max_seq_length=128 45 | """.split() 46 | 47 | with patch.object(sys, "argv", testargs): 48 | result = run_glue_with_pabee.main() 49 | for value in result.values(): 50 | self.assertGreaterEqual(value, 0.75) 51 | -------------------------------------------------------------------------------- /examples/research_projects/bertabs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/bertabs/__init__.py -------------------------------------------------------------------------------- /examples/research_projects/bertabs/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | 3 | # For ROUGE 4 | nltk 5 | py-rouge 6 | -------------------------------------------------------------------------------- /examples/research_projects/bertology/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/entropy_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | if [ $MODEL_TYPE = 'bert' ] 12 | then 13 | MODEL_NAME=${MODEL_NAME}-uncased 14 | fi 15 | 16 | ENTROPIES="0 0.1 0.2 0.3 0.4 0.5 0.6 0.7" 17 | 18 | for ENTROPY in $ENTROPIES; do 19 | python -u run_glue_deebert.py \ 20 | --model_type $MODEL_TYPE \ 21 | --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 22 | --task_name $DATASET \ 23 | --do_eval \ 24 | --do_lower_case \ 25 | --data_dir $PATH_TO_DATA/$DATASET \ 26 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 27 | --plot_data_dir ./results/ \ 28 | --max_seq_length 128 \ 29 | --early_exit_entropy $ENTROPY \ 30 | --eval_highway \ 31 | --overwrite_cache \ 32 | --per_gpu_eval_batch_size=1 33 | done 34 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/eval_deebert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | if [ $MODEL_TYPE = 'bert' ] 12 | then 13 | MODEL_NAME=${MODEL_NAME}-uncased 14 | fi 15 | 16 | 17 | python -u run_glue_deebert.py \ 18 | --model_type $MODEL_TYPE \ 19 | --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 20 | --task_name $DATASET \ 21 | --do_eval \ 22 | --do_lower_case \ 23 | --data_dir $PATH_TO_DATA/$DATASET \ 24 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 25 | --plot_data_dir ./results/ \ 26 | --max_seq_length 128 \ 27 | --eval_each_highway \ 28 | --eval_highway \ 29 | --overwrite_cache \ 30 | --per_gpu_eval_batch_size=1 31 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/deebert/src/__init__.py -------------------------------------------------------------------------------- /examples/research_projects/deebert/train_deebert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | EPOCHS=10 12 | if [ $MODEL_TYPE = 'bert' ] 13 | then 14 | EPOCHS=3 15 | MODEL_NAME=${MODEL_NAME}-uncased 16 | fi 17 | 18 | 19 | python -u run_glue_deebert.py \ 20 | --model_type $MODEL_TYPE \ 21 | --model_name_or_path $MODEL_NAME \ 22 | --task_name $DATASET \ 23 | --do_train \ 24 | --do_eval \ 25 | --do_lower_case \ 26 | --data_dir $PATH_TO_DATA/$DATASET \ 27 | --max_seq_length 128 \ 28 | --per_gpu_eval_batch_size=1 \ 29 | --per_gpu_train_batch_size=8 \ 30 | --learning_rate 2e-5 \ 31 | --num_train_epochs $EPOCHS \ 32 | --overwrite_output_dir \ 33 | --seed 42 \ 34 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 35 | --plot_data_dir ./results/ \ 36 | --save_steps 0 \ 37 | --overwrite_cache \ 38 | --eval_after_first_stage 39 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | 3 | gitpython==3.0.2 4 | tensorboard>=1.14.0 5 | tensorboardX==1.8 6 | psutil==5.6.6 7 | scipy>=1.4.1 8 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 28996 14 | } 15 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-multilingual-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 119547 14 | } 15 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-uncased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 30522 14 | } 15 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilgpt2.json: -------------------------------------------------------------------------------- 1 | { 2 | "initializer_range": 0.02, 3 | "layer_norm_epsilon": 0.00001, 4 | "n_ctx": 1024, 5 | "n_embd": 768, 6 | "n_head": 12, 7 | "n_layer": 6, 8 | "n_positions": 1024, 9 | "vocab_size": 50257 10 | } -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilroberta-base.json: -------------------------------------------------------------------------------- 1 | { 2 | "vocab_size": 50265, 3 | "hidden_size": 768, 4 | "num_hidden_layers": 6, 5 | "num_attention_heads": 12, 6 | "intermediate_size": 3072, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "attention_probs_dropout_prob": 0.1, 10 | "max_position_embeddings": 514, 11 | "type_vocab_size": 1, 12 | "initializer_range": 0.02, 13 | "layer_norm_eps": 0.00001 14 | } -------------------------------------------------------------------------------- /examples/research_projects/longform-qa/README.md: -------------------------------------------------------------------------------- 1 | # Long Form Question Answering 2 | 3 | Author: @yjernite 4 | 5 | This folder contains the code for the Long Form Question answering [demo](http://35.226.96.115:8080/) as well as methods to train and use a fully end-to-end Long Form Question Answering system using the [🤗transformers](https://github.com/huggingface/transformers) and [🤗datasets](https://github.com/huggingface/datasets) libraries. 6 | 7 | You can use these methods to train your own system by following along the associate [notebook](https://github.com/huggingface/notebooks/blob/master/longform-qa/Long_Form_Question_Answering_with_ELI5_and_Wikipedia.ipynb) or [blog post](https://yjernite.github.io/lfqa.html). 8 | -------------------------------------------------------------------------------- /examples/research_projects/longform-qa/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | faiss-cpu 3 | streamlit 4 | elasticsearch 5 | -------------------------------------------------------------------------------- /examples/research_projects/lxmert/README.md: -------------------------------------------------------------------------------- 1 | # LXMERT DEMO 2 | 3 | 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate`` 4 | 2. install reqs: ``pip install -r ./requirements.txt`` 5 | 3. usage is as shown in demo.ipynb 6 | -------------------------------------------------------------------------------- /examples/research_projects/mlm_wwm/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | sentencepiece != 0.1.92 3 | protobuf 4 | ltp 5 | -------------------------------------------------------------------------------- /examples/research_projects/mm-imdb/README.md: -------------------------------------------------------------------------------- 1 | ## MM-IMDb 2 | 3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/master/examples/research_projects/mm-imdb/run_mmimdb.py). 4 | 5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata. 6 | 7 | ### Training on MM-IMDb 8 | 9 | ``` 10 | python run_mmimdb.py \ 11 | --data_dir /path/to/mmimdb/dataset/ \ 12 | --model_type bert \ 13 | --model_name_or_path bert-base-uncased \ 14 | --output_dir /path/to/save/dir/ \ 15 | --do_train \ 16 | --do_eval \ 17 | --max_seq_len 512 \ 18 | --gradient_accumulation_steps 20 \ 19 | --num_image_embeds 3 \ 20 | --num_train_epochs 100 \ 21 | --patience 5 22 | ``` 23 | 24 | -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/emmental/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .configuration_bert_masked import MaskedBertConfig 3 | from .modeling_bert_masked import ( 4 | MaskedBertForMultipleChoice, 5 | MaskedBertForQuestionAnswering, 6 | MaskedBertForSequenceClassification, 7 | MaskedBertForTokenClassification, 8 | MaskedBertModel, 9 | ) 10 | from .modules import * 11 | -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/emmental/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer 3 | from .masked_nn import MaskedLinear 4 | -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.4.0 2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers 3 | knockknock>=0.1.8.1 4 | h5py>=2.10.0 5 | numpy>=1.18.2 6 | scipy>=1.4.1 7 | -------------------------------------------------------------------------------- /examples/research_projects/performer/README.md: -------------------------------------------------------------------------------- 1 | # Performer fine-tuning 2 | 3 | Example authors: @TevenLeScao, @Patrickvonplaten 4 | 5 | Paper authors: Krzysztof Choromanski, Valerii Likhosherstov, David Dohan, Xingyou Song, Andreea Gane, Tamas Sarlos, Peter Hawkins, Jared Davis, Afroz Mohiuddin, Lukasz Kaiser, David Belanger, Lucy Colwell, Adrian Weller 6 | 7 | ## Requirements 8 | 9 | `datasets`, `flax` and `jax`. `wandb` integration is built-in if you want to use it. 10 | 11 | ## Examples 12 | 13 | `sanity_script.sh` will launch performer fine-tuning from the bert-base-cased checkpoint on the Simple Wikipedia dataset (a small, easy-language English Wikipedia) from `datasets`. 14 | `full_script.sh` will launch performer fine-tuning from the bert-large-cased checkpoint on the English Wikipedia dataset from `datasets`. 15 | 16 | Here are a few key arguments: 17 | - Remove the `--performer` argument to use a standard Bert model. 18 | 19 | - Add `--reinitialize` to start from a blank model rather than a Bert checkpoint. 20 | 21 | - You may change the Bert size by passing a different [checkpoint](https://huggingface.co/transformers/pretrained_models.html) to the `--model_name_or_path` argument. 22 | 23 | - Passing your user name to the `--wandb_user_name` argument will trigger weights and biases logging. 24 | 25 | - You can choose a dataset with `--dataset_name` and `--dataset_config`. Our [viewer](https://huggingface.co/datasets/viewer/) will help you find what you need. -------------------------------------------------------------------------------- /examples/research_projects/performer/full_script.sh: -------------------------------------------------------------------------------- 1 | TOKENIZERS_PARALLELISM=true python run_mlm_performer.py --output_dir experiments --dataset_name wikipedia --dataset_config_name 20200501.en --model_name_or_path bert-large-cased --tokenizer_name bert-large-cased --do_train --overwrite_output_dir --per_device_train_batch_size 4 --learning_rate 5e-4 --warmup_steps 100 --num_train_epochs 3 --performer -------------------------------------------------------------------------------- /examples/research_projects/performer/sanity_script.sh: -------------------------------------------------------------------------------- 1 | TOKENIZERS_PARALLELISM=true python run_mlm_performer.py --output_dir experiments --dataset_name wikipedia --dataset_config_name 20200501.simple --model_name_or_path bert-base-cased --tokenizer_name bert-base-cased --do_train --overwrite_output_dir --per_device_train_batch_size 4 --learning_rate 5e-4 --warmup_steps 100 --num_train_epochs 3 --performer -------------------------------------------------------------------------------- /examples/research_projects/pplm/imgs/headfigure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/pplm/imgs/headfigure.png -------------------------------------------------------------------------------- /examples/research_projects/pplm/imgs/wooly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/examples/research_projects/pplm/imgs/wooly.png -------------------------------------------------------------------------------- /examples/research_projects/pplm/pplm_classification_head.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class ClassificationHead(nn.Module): 5 | """Classification Head for transformer encoders""" 6 | 7 | def __init__(self, class_size, embed_size): 8 | super().__init__() 9 | self.class_size = class_size 10 | self.embed_size = embed_size 11 | # self.mlp1 = nn.Linear(embed_size, embed_size) 12 | # self.mlp2 = (nn.Linear(embed_size, class_size)) 13 | self.mlp = nn.Linear(embed_size, class_size) 14 | 15 | def forward(self, hidden_state): 16 | # hidden_state = nn.functional.relu(self.mlp1(hidden_state)) 17 | # hidden_state = self.mlp2(hidden_state) 18 | logits = self.mlp(hidden_state) 19 | return logits 20 | -------------------------------------------------------------------------------- /examples/research_projects/pplm/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | pytorch-lightning==1.0.4 9 | matplotlib 10 | git-python==1.0.3 11 | faiss-cpu 12 | streamlit 13 | elasticsearch 14 | nltk 15 | pandas 16 | datasets >= 1.1.3 17 | fire 18 | pytest 19 | conllu 20 | sentencepiece != 0.1.92 21 | protobuf 22 | transformers==3.5.1 23 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu >= 1.7.0 2 | datasets >= 1.6.2 3 | psutil >= 5.7.0 4 | torch >= 1.4.0 5 | pytorch-lightning == 1.3.1 6 | nvidia-ml-py3 == 7.352.0 7 | ray >= 1.3.0 8 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/train.source: -------------------------------------------------------------------------------- 1 | What does Moses' rod turn into ? 2 | Who is Aron? 3 | Where did Moses grow up ? 4 | What happens at the command of the Moses ? 5 | Who manages the Pokémon ? 6 | Who owned the Pokémon trademark ? 7 | What else include in Pokémon franchise ? 8 | How many seasons in Pokémon animme series ? 9 | What does Moses' rod turn into ? 10 | Who is Aron? 11 | Where did Moses grow up ? 12 | What happens at the command of the Moses ? 13 | Who manages the Pokémon ? 14 | Who owned the Pokémon trademark ? 15 | What else include in Pokémon franchise ? 16 | How many seasons in Pokémon animme series ? 17 | What does Moses' rod turn into ? 18 | Who is Aron? 19 | Where did Moses grow up ? 20 | What happens at the command of the Moses ? 21 | Who manages the Pokémon ? 22 | Who owned the Pokémon trademark ? 23 | What else include in Pokémon franchise ? 24 | How many seasons in Pokémon animme series ? 25 | What does Moses' rod turn into ? 26 | Who is Aron? 27 | Where did Moses grow up ? 28 | What happens at the command of the Moses ? 29 | Who manages the Pokémon ? 30 | Who owned the Pokémon trademark ? 31 | What else include in Pokémon franchise ? 32 | How many seasons in Pokémon animme series ? 33 | What does Moses' rod turn into ? 34 | Who is Aron? 35 | Where did Moses grow up ? 36 | What happens at the command of the Moses ? 37 | Who manages the Pokémon ? 38 | Who owned the Pokémon trademark ? 39 | What else include in Pokémon franchise ? 40 | How many seasons in Pokémon animme series ? 41 | What does Moses' rod turn into ? 42 | Who is Aron? 43 | Where did Moses grow up ? 44 | What happens at the command of the Moses ? 45 | Who manages the Pokémon ? 46 | Who owned the Pokémon trademark ? 47 | What else include in Pokémon franchise ? 48 | How many seasons in Pokémon animme series ? -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/train.target: -------------------------------------------------------------------------------- 1 | to a snake 2 | Moses' assistant 3 | Egyptian royal court 4 | let his rod turn in to a snake 5 | The Pokémon Company 6 | Nintendo 7 | world's top-selling toy brand, the top-selling trading card game 8 | over 20 seasons 9 | to a snake 10 | Moses' assistant 11 | Egyptian royal court 12 | let his rod turn in to a snake 13 | The Pokémon Company 14 | Nintendo 15 | world's top-selling toy brand, the top-selling trading card game 16 | over 20 seasons 17 | to a snake 18 | Moses' assistant 19 | Egyptian royal court 20 | let his rod turn in to a snake 21 | The Pokémon Company 22 | Nintendo 23 | world's top-selling toy brand, the top-selling trading card game 24 | over 20 seasons 25 | to a snake 26 | Moses' assistant 27 | Egyptian royal court 28 | let his rod turn in to a snake 29 | The Pokémon Company 30 | Nintendo 31 | world's top-selling toy brand, the top-selling trading card game 32 | over 20 seasons 33 | to a snake 34 | Moses' assistant 35 | Egyptian royal court 36 | let his rod turn in to a snake 37 | The Pokémon Company 38 | Nintendo 39 | world's top-selling toy brand, the top-selling trading card game 40 | over 20 seasons 41 | to a snake 42 | Moses' assistant 43 | Egyptian royal court 44 | let his rod turn in to a snake 45 | The Pokémon Company 46 | Nintendo 47 | world's top-selling toy brand, the top-selling trading card game 48 | over 20 seasons -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/val.source: -------------------------------------------------------------------------------- 1 | What does Moses' rod turn into ? 2 | Who is Aron? 3 | Where did Moses grow up ? 4 | What happens at the command of the Moses ? 5 | Who manages the Pokémon ? 6 | Who owned the Pokémon trademark ? 7 | What else include in Pokémon franchise ? 8 | How many seasons in Pokémon animme series ? -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/val.target: -------------------------------------------------------------------------------- 1 | to a snake 2 | Moses' assistant 3 | Egyptian royal court 4 | let his rod turn in to a snake 5 | The Pokémon Company 6 | Nintendo 7 | world's top-selling toy brand, the top-selling trading card game 8 | over 20 seasons -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/test_finetune.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | #creates the custom knowlegebase 5 | python use_own_knowledge_dataset.py 6 | 7 | 8 | # Start a single-node Ray cluster. 9 | ray start --head 10 | 11 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path 12 | # run ./examples/rag/finetune_rag_ray.sh --help to see all the possible options 13 | 14 | 15 | 16 | python finetune_rag.py \ 17 | --model_name_or_path facebook/rag-token-base \ 18 | --model_type rag_token \ 19 | --fp16 \ 20 | --gpus 2 \ 21 | --profile \ 22 | --do_train \ 23 | --end2end \ 24 | --do_predict \ 25 | --n_val -1 \ 26 | --train_batch_size 1 \ 27 | --eval_batch_size 1 \ 28 | --max_source_length 128 \ 29 | --max_target_length 25 \ 30 | --val_max_target_length 25 \ 31 | --test_max_target_length 25 \ 32 | --label_smoothing 0.1 \ 33 | --dropout 0.1 \ 34 | --attention_dropout 0.1 \ 35 | --weight_decay 0.001 \ 36 | --adam_epsilon 1e-08 \ 37 | --max_grad_norm 0.1 \ 38 | --lr_scheduler polynomial \ 39 | --learning_rate 3e-05 \ 40 | --num_train_epochs 10 \ 41 | --warmup_steps 500 \ 42 | --gradient_accumulation_steps 1 \ 43 | --distributed_retriever ray \ 44 | --num_retrieval_workers 4 \ 45 | --index_name custom \ 46 | --context_encoder_name facebook/dpr-ctx_encoder-multiset-base \ 47 | --index_gpus 1 \ 48 | --gpu_order [6,7,8,9,0,1,2,3,5,4] \ 49 | --indexing_freq 5 50 | 51 | 52 | 53 | # Stop the Ray cluster. 54 | ray stop 55 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/test_rag_new_features.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH="../":"${PYTHONPATH}" 2 | 3 | python use_own_knowledge_dataset.py 4 | 5 | ray start --head 6 | python finetune_rag.py \ 7 | --model_name_or_path facebook/rag-token-base \ 8 | --model_type rag_token \ 9 | --context_encoder_name facebook/dpr-ctx_encoder-multiset-base \ 10 | --fp16 \ 11 | --gpus 1 \ 12 | --profile \ 13 | --end2end \ 14 | --index_name custom 15 | 16 | ray stop 17 | -------------------------------------------------------------------------------- /examples/research_projects/rag/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__))) 6 | -------------------------------------------------------------------------------- /examples/research_projects/rag/finetune_rag.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path 5 | # run ./examples/rag/finetune_rag.sh --help to see all the possible options 6 | 7 | python examples/rag/finetune_rag.py \ 8 | --data_dir $DATA_DIR \ 9 | --output_dir $OUTPUT_DIR \ 10 | --model_name_or_path $MODEL_NAME_OR_PATH \ 11 | --model_type rag_sequence \ 12 | --fp16 \ 13 | --gpus 8 \ 14 | --profile \ 15 | --do_train \ 16 | --do_predict \ 17 | --n_val -1 \ 18 | --train_batch_size 8 \ 19 | --eval_batch_size 1 \ 20 | --max_source_length 128 \ 21 | --max_target_length 25 \ 22 | --val_max_target_length 25 \ 23 | --test_max_target_length 25 \ 24 | --label_smoothing 0.1 \ 25 | --dropout 0.1 \ 26 | --attention_dropout 0.1 \ 27 | --weight_decay 0.001 \ 28 | --adam_epsilon 1e-08 \ 29 | --max_grad_norm 0.1 \ 30 | --lr_scheduler polynomial \ 31 | --learning_rate 3e-05 \ 32 | --num_train_epochs 100 \ 33 | --warmup_steps 500 \ 34 | --gradient_accumulation_steps 1 \ 35 | -------------------------------------------------------------------------------- /examples/research_projects/rag/finetune_rag_ray.sh: -------------------------------------------------------------------------------- 1 | # Sample script to finetune RAG using Ray for distributed retrieval. 2 | 3 | # Add parent directory to python path to access lightning_base.py 4 | export PYTHONPATH="../":"${PYTHONPATH}" 5 | 6 | # Start a single-node Ray cluster. 7 | ray start --head 8 | 9 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path 10 | # run ./examples/rag/finetune_rag_ray.sh --help to see all the possible options 11 | 12 | python examples/rag/finetune_rag.py \ 13 | --data_dir $DATA_DIR \ 14 | --output_dir $OUTPUT_DIR \ 15 | --model_name_or_path $MODEL_NAME_OR_PATH \ 16 | --model_type rag_sequence \ 17 | --fp16 \ 18 | --gpus 8 \ 19 | --profile \ 20 | --do_train \ 21 | --do_predict \ 22 | --n_val -1 \ 23 | --train_batch_size 8 \ 24 | --eval_batch_size 1 \ 25 | --max_source_length 128 \ 26 | --max_target_length 25 \ 27 | --val_max_target_length 25 \ 28 | --test_max_target_length 25 \ 29 | --label_smoothing 0.1 \ 30 | --dropout 0.1 \ 31 | --attention_dropout 0.1 \ 32 | --weight_decay 0.001 \ 33 | --adam_epsilon 1e-08 \ 34 | --max_grad_norm 0.1 \ 35 | --lr_scheduler polynomial \ 36 | --learning_rate 3e-05 \ 37 | --num_train_epochs 100 \ 38 | --warmup_steps 500 \ 39 | --gradient_accumulation_steps 1 \ 40 | --distributed_retriever ray \ 41 | --num_retrieval_workers 4 42 | 43 | # Stop the Ray cluster. 44 | ray stop 45 | -------------------------------------------------------------------------------- /examples/research_projects/rag/parse_dpr_relevance_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script reads DPR retriever training data and parses each datapoint. We save a line per datapoint. 3 | Each line consists of the query followed by a tab-separated list of Wikipedia page titles constituting 4 | positive contexts for a given query. 5 | """ 6 | 7 | import argparse 8 | import json 9 | 10 | from tqdm import tqdm 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser() 15 | 16 | # Required parameters 17 | parser.add_argument( 18 | "--src_path", 19 | type=str, 20 | default="biencoder-nq-dev.json", 21 | help="Path to raw DPR training data", 22 | ) 23 | parser.add_argument( 24 | "--evaluation_set", 25 | type=str, 26 | help="where to store parsed evaluation_set file", 27 | ) 28 | parser.add_argument( 29 | "--gold_data_path", 30 | type=str, 31 | help="where to store parsed gold_data_path file", 32 | ) 33 | args = parser.parse_args() 34 | 35 | with open(args.src_path, "r") as src_file, open(args.evaluation_set, "w") as eval_file, open( 36 | args.gold_data_path, "w" 37 | ) as gold_file: 38 | dpr_records = json.load(src_file) 39 | for dpr_record in tqdm(dpr_records): 40 | question = dpr_record["question"] 41 | contexts = [context["title"] for context in dpr_record["positive_ctxs"]] 42 | eval_file.write(question + "\n") 43 | gold_file.write("\t".join(contexts) + "\n") 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /examples/research_projects/rag/requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu >= 1.6.3 2 | datasets >= 1.0.1 3 | psutil >= 5.7.0 4 | torch >= 1.4.0 5 | transformers 6 | pytorch-lightning==1.3.1 7 | GitPython -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/_test_make_student.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import unittest 3 | 4 | from make_student import create_student_by_copying_alternating_layers 5 | from transformers import AutoConfig 6 | from transformers.file_utils import cached_property 7 | from transformers.testing_utils import require_torch 8 | 9 | 10 | TINY_BART = "sshleifer/bart-tiny-random" 11 | TINY_T5 = "patrickvonplaten/t5-tiny-random" 12 | 13 | 14 | @require_torch 15 | class MakeStudentTester(unittest.TestCase): 16 | @cached_property 17 | def teacher_config(self): 18 | return AutoConfig.from_pretrained(TINY_BART) 19 | 20 | def test_valid_t5(self): 21 | student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1) 22 | self.assertEqual(student.config.num_hidden_layers, 1) 23 | 24 | def test_asymmetric_t5(self): 25 | student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None) 26 | 27 | def test_same_decoder_small_encoder(self): 28 | student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None) 29 | self.assertEqual(student.config.encoder_layers, 1) 30 | self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers) 31 | 32 | def test_small_enc_small_dec(self): 33 | student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1) 34 | self.assertEqual(student.config.encoder_layers, 1) 35 | self.assertEqual(student.config.decoder_layers, 1) 36 | 37 | def test_raises_assert(self): 38 | with self.assertRaises(AssertionError): 39 | create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None) 40 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/distil_marian_enro_teacher.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | # export MAX_LEN=128 5 | python distillation.py \ 6 | --learning_rate=3e-4 \ 7 | --do_train \ 8 | --fp16 \ 9 | --val_check_interval 0.25 \ 10 | --teacher Helsinki-NLP/opus-mt-en-ro \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --student_decoder_layers 3 --student_encoder_layers 6 \ 13 | --freeze_encoder --freeze_embeds \ 14 | --model_name_or_path IGNORED \ 15 | --alpha_hid=3. \ 16 | --train_batch_size=$BS --eval_batch_size=$BS \ 17 | --tokenizer_name Helsinki-NLP/opus-mt-en-ro \ 18 | --warmup_steps 500 --logger_name wandb \ 19 | --fp16_opt_level O1 --task translation --normalize_hidden --num_sanity_val_steps=0 \ 20 | "$@" 21 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/distil_marian_no_teacher.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | export MAX_LEN=128 5 | python finetune.py \ 6 | --learning_rate=3e-4 \ 7 | --do_train \ 8 | --do_predict \ 9 | --fp16 \ 10 | --val_check_interval 0.25 \ 11 | --data_dir $ENRO_DIR \ 12 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 13 | --freeze_encoder --freeze_embeds \ 14 | --train_batch_size=$BS --eval_batch_size=$BS \ 15 | --tokenizer_name $m --model_name_or_path $m \ 16 | --warmup_steps 500 --sortish_sampler --logger_name wandb \ 17 | --gpus 1 --fp16_opt_level=O1 --task translation --num_sanity_val_steps=0 \ 18 | "$@" 19 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/dynamic_bs_example.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | export MAX_LEN=128 5 | export m=sshleifer/student_marian_en_ro_6_1 6 | python finetune.py \ 7 | --learning_rate=3e-4 \ 8 | --do_train \ 9 | --fp16 \ 10 | --data_dir wmt_en_ro \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --freeze_encoder --freeze_embeds \ 13 | --train_batch_size=48 --eval_batch_size=64 \ 14 | --tokenizer_name $m --model_name_or_path $m --num_train_epochs=1 \ 15 | --warmup_steps 500 --logger_name wandb --gpus 1 \ 16 | --fp16_opt_level=O1 --task translation \ 17 | "$@" 18 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune.sh: -------------------------------------------------------------------------------- 1 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 2 | # run ./finetune.sh --help to see all the possible options 3 | python finetune.py \ 4 | --learning_rate=3e-5 \ 5 | --fp16 \ 6 | --gpus 1 \ 7 | --do_train \ 8 | --do_predict \ 9 | --n_val 1000 \ 10 | --val_check_interval 0.1 \ 11 | "$@" 12 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_bart_tiny.sh: -------------------------------------------------------------------------------- 1 | # Script for verifying that run_bart_sum can be invoked from its directory 2 | 3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test) 4 | wget https://cdn-datasets.huggingface.co/summarization/cnn_tiny.tgz 5 | tar -xzvf cnn_tiny.tgz 6 | rm cnn_tiny.tgz 7 | 8 | export OUTPUT_DIR_NAME=bart_utest_output 9 | export CURRENT_DIR=${PWD} 10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 11 | 12 | # Make output directory if it doesn't exist 13 | mkdir -p $OUTPUT_DIR 14 | 15 | # Add parent directory to python path to access lightning_base.py and testing_utils.py 16 | export PYTHONPATH="../":"${PYTHONPATH}" 17 | python finetune.py \ 18 | --data_dir=cnn_tiny/ \ 19 | --model_name_or_path=sshleifer/bart-tiny-random \ 20 | --learning_rate=3e-5 \ 21 | --train_batch_size=2 \ 22 | --eval_batch_size=2 \ 23 | --output_dir=$OUTPUT_DIR \ 24 | --num_train_epochs=1 \ 25 | --gpus=0 \ 26 | --do_train "$@" 27 | 28 | rm -rf cnn_tiny 29 | rm -rf $OUTPUT_DIR 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_pegasus_xsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | # From appendix C of paper https://arxiv.org/abs/1912.08777 5 | # Set --gradient_accumulation_steps so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16) 6 | python finetune.py \ 7 | --learning_rate=1e-4 \ 8 | --do_train \ 9 | --do_predict \ 10 | --n_val 1000 \ 11 | --val_check_interval 0.25 \ 12 | --max_source_length 512 --max_target_length 56 \ 13 | --freeze_embeds --label_smoothing 0.1 --adafactor --task summarization_xsum \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_t5.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | python finetune.py \ 5 | --data_dir=$CNN_DIR \ 6 | --learning_rate=3e-5 \ 7 | --train_batch_size=$BS \ 8 | --eval_batch_size=$BS \ 9 | --output_dir=$OUTPUT_DIR \ 10 | --max_source_length=512 \ 11 | --max_target_length=56 \ 12 | --val_check_interval=0.1 --n_val=200 \ 13 | --do_train --do_predict \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | psutil 4 | sacrebleu 5 | rouge-score 6 | tensorflow_datasets 7 | pytorch-lightning==1.0.4 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/sentence_splitter.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from filelock import FileLock 4 | 5 | 6 | try: 7 | import nltk 8 | 9 | NLTK_AVAILABLE = True 10 | except (ImportError, ModuleNotFoundError): 11 | NLTK_AVAILABLE = False 12 | 13 | if NLTK_AVAILABLE: 14 | with FileLock(".lock") as lock: 15 | nltk.download("punkt", quiet=True) 16 | 17 | 18 | def add_newline_to_end_of_each_sentence(x: str) -> str: 19 | """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS.""" 20 | re.sub("", "", x) # remove pegasus newline char 21 | assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)" 22 | return "\n".join(nltk.sent_tokenize(x)) 23 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_distilbart_cnn.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | export BS=32 5 | export GAS=1 6 | 7 | python finetune.py \ 8 | --learning_rate=3e-5 \ 9 | --fp16 \ 10 | --gpus 1 \ 11 | --do_train \ 12 | --do_predict \ 13 | --val_check_interval 0.25 \ 14 | --n_val 500 \ 15 | --num_train_epochs 2 \ 16 | --freeze_encoder --freeze_embeds --data_dir cnn_dm \ 17 | --max_target_length 142 --val_max_target_length=142 \ 18 | --train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \ 19 | --model_name_or_path sshleifer/student_cnn_12_6 \ 20 | --tokenizer_name facebook/bart-large \ 21 | --warmup_steps 500 \ 22 | --output_dir distilbart-cnn-12-6 \ 23 | "$@" 24 | 25 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_distilbart_xsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | python distillation.py \ 4 | --teacher facebook/bart-large-xsum --data_dir xsum \ 5 | --tokenizer_name facebook/bart-large-xsum \ 6 | --student_decoder_layers 6 --student_encoder_layers 12 \ 7 | --freeze_encoder --freeze_embeds \ 8 | --learning_rate=3e-4 \ 9 | --do_train \ 10 | --do_predict \ 11 | --fp16 --fp16_opt_level=O1 \ 12 | --val_check_interval 0.1 --n_val 1000 --eval_beams 2 --length_penalty=0.5 \ 13 | --max_target_length=60 --val_max_target_length=60 --test_max_target_length=100 \ 14 | --model_name_or_path IGNORED \ 15 | --alpha_hid=3. \ 16 | --train_batch_size=16 --eval_batch_size=16 --gradient_accumulation_steps=2 \ 17 | --sortish_sampler \ 18 | --num_train_epochs=6 \ 19 | --warmup_steps 500 \ 20 | --output_dir distilbart_xsum_12_6 \ 21 | "$@" 22 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_mbart_cc25_enro.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | python finetune.py \ 5 | --learning_rate=3e-5 \ 6 | --fp16 \ 7 | --do_train \ 8 | --val_check_interval=0.25 \ 9 | --adam_eps 1e-06 \ 10 | --num_train_epochs 6 --src_lang en_XX --tgt_lang ro_RO \ 11 | --data_dir $ENRO_DIR \ 12 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 13 | --train_batch_size=$BS --eval_batch_size=$BS \ 14 | --task translation \ 15 | --warmup_steps 500 \ 16 | --freeze_embeds \ 17 | --model_name_or_path=facebook/mbart-large-cc25 \ 18 | "$@" 19 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/ds_config_wav2vec2_zero2.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | 11 | "optimizer": { 12 | "type": "AdamW", 13 | "params": { 14 | "lr": "auto", 15 | "betas": "auto", 16 | "eps": "auto", 17 | "weight_decay": "auto" 18 | } 19 | }, 20 | 21 | "scheduler": { 22 | "type": "WarmupLR", 23 | "params": { 24 | "warmup_min_lr": "auto", 25 | "warmup_max_lr": "auto", 26 | "warmup_num_steps": "auto" 27 | } 28 | }, 29 | 30 | "zero_optimization": { 31 | "stage": 2, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "find_unused_parameters": true, 37 | "allgather_partitions": true, 38 | "allgather_bucket_size": 2e8, 39 | "overlap_comm": true, 40 | "reduce_scatter": true, 41 | "reduce_bucket_size": 2e8, 42 | "contiguous_gradients": true 43 | }, 44 | 45 | "gradient_accumulation_steps": "auto", 46 | "gradient_clipping": "auto", 47 | "steps_per_print": 2000, 48 | "train_batch_size": "auto", 49 | "train_micro_batch_size_per_gpu": "auto", 50 | "wall_clock_breakdown": false 51 | } 52 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/ds_config_wav2vec2_zero3.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | 11 | "optimizer": { 12 | "type": "AdamW", 13 | "params": { 14 | "lr": "auto", 15 | "betas": "auto", 16 | "eps": "auto", 17 | "weight_decay": "auto" 18 | } 19 | }, 20 | 21 | "scheduler": { 22 | "type": "WarmupLR", 23 | "params": { 24 | "warmup_min_lr": "auto", 25 | "warmup_max_lr": "auto", 26 | "warmup_num_steps": "auto" 27 | } 28 | }, 29 | 30 | "zero_optimization": { 31 | "stage": 3, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "offload_param": { 37 | "device": "cpu", 38 | "pin_memory": true 39 | }, 40 | "overlap_comm": true, 41 | "contiguous_gradients": true, 42 | "sub_group_size": 1e9, 43 | "reduce_bucket_size": "auto", 44 | "stage3_prefetch_bucket_size": "auto", 45 | "stage3_param_persistence_threshold": "auto", 46 | "stage3_max_live_parameters": 1e9, 47 | "stage3_max_reuse_distance": 1e9, 48 | "stage3_gather_fp16_weights_on_model_save": true 49 | }, 50 | 51 | "gradient_accumulation_steps": "auto", 52 | "gradient_clipping": "auto", 53 | "steps_per_print": 2000, 54 | "train_batch_size": "auto", 55 | "train_micro_batch_size_per_gpu": "auto", 56 | "wall_clock_breakdown": false 57 | } 58 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_base_100.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-base-100h" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="32" \ 6 | --per_device_eval_batch_size="32" \ 7 | --evaluation_strategy="steps" \ 8 | --save_total_limit="3" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-base" \ 15 | --fp16 \ 16 | --dataset_name="librispeech_asr" \ 17 | --dataset_config_name="clean" \ 18 | --train_split_name="train.100" \ 19 | --preprocessing_num_workers="32" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor 22 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_base_timit_asr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-base-timit-asr" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="20" \ 6 | --per_device_eval_batch_size="20" \ 7 | --evaluation_strategy="steps" \ 8 | --save_steps="500" \ 9 | --eval_steps="100" \ 10 | --logging_steps="50" \ 11 | --learning_rate="5e-4" \ 12 | --warmup_steps="3000" \ 13 | --model_name_or_path="facebook/wav2vec2-base" \ 14 | --fp16 \ 15 | --dataset_name="timit_asr" \ 16 | --train_split_name="train" \ 17 | --validation_split_name="test" \ 18 | --orthography="timit" \ 19 | --preprocessing_num_workers="$(nproc)" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor \ 22 | --verbose_logging \ 23 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_lv60_100.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-lv60-100h" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="16" \ 6 | --per_device_eval_batch_size="16" \ 7 | --evaluation_strategy="steps" \ 8 | --save_total_limit="3" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-large-lv60" \ 15 | --fp16 \ 16 | --dataset_name="librispeech_asr" \ 17 | --dataset_config_name="clean" \ 18 | --train_split_name="train.100" \ 19 | --preprocessing_num_workers="32" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor 22 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_lv60_timit_asr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-lv60-timit-asr" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="2" \ 6 | --per_device_eval_batch_size="2" \ 7 | --gradient_accumulation_steps="4" \ 8 | --evaluation_strategy="steps" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-large-lv60" \ 15 | --fp16 \ 16 | --dataset_name="timit_asr" \ 17 | --train_split_name="train" \ 18 | --validation_split_name="test" \ 19 | --orthography="timit" \ 20 | --preprocessing_num_workers="$(nproc)" \ 21 | --group_by_length \ 22 | --freeze_feature_extractor \ 23 | --verbose_logging \ 24 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_xlsr_53_arabic_speech_corpus.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-xlsr-53-arabic-speech-corpus" \ 4 | --num_train_epochs="50" \ 5 | --per_device_train_batch_size="1" \ 6 | --per_device_eval_batch_size="1" \ 7 | --gradient_accumulation_steps="8" \ 8 | --evaluation_strategy="steps" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="elgeish/wav2vec2-large-xlsr-53-arabic" \ 15 | --fp16 \ 16 | --dataset_name="arabic_speech_corpus" \ 17 | --train_split_name="train" \ 18 | --validation_split_name="test" \ 19 | --max_duration_in_seconds="15" \ 20 | --orthography="buckwalter" \ 21 | --preprocessing_num_workers="$(nproc)" \ 22 | --group_by_length \ 23 | --freeze_feature_extractor \ 24 | --target_feature_extractor_sampling_rate \ 25 | --verbose_logging \ 26 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_wav2vec2_xlsr_turkish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_common_voice.py \ 3 | --model_name_or_path="facebook/wav2vec2-large-xlsr-53" \ 4 | --dataset_config_name="tr" \ 5 | --output_dir=./wav2vec2-large-xlsr-turkish-demo \ 6 | --overwrite_output_dir \ 7 | --num_train_epochs="5" \ 8 | --per_device_train_batch_size="16" \ 9 | --evaluation_strategy="steps" \ 10 | --learning_rate="3e-4" \ 11 | --warmup_steps="500" \ 12 | --fp16 \ 13 | --freeze_feature_extractor \ 14 | --save_steps="400" \ 15 | --eval_steps="400" \ 16 | --save_total_limit="3" \ 17 | --logging_steps="400" \ 18 | --group_by_length \ 19 | --feat_proj_dropout="0.0" \ 20 | --layerdrop="0.1" \ 21 | --gradient_checkpointing \ 22 | --do_train --do_eval 23 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | datasets 3 | torch>=1.5.0 4 | torchaudio 5 | jiwer==2.2.0 6 | lang-trans==0.6.0 7 | librosa==0.8.0 8 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/vocab/buckwalter.json: -------------------------------------------------------------------------------- 1 | { 2 | "": 0, 3 | "": 1, 4 | "": 2, 5 | "": 3, 6 | "/": 4, 7 | "'": 5, 8 | "|": 6, 9 | ">": 7, 10 | "&": 8, 11 | "<": 9, 12 | "}": 10, 13 | "A": 11, 14 | "b": 12, 15 | "p": 13, 16 | "t": 14, 17 | "v": 15, 18 | "j": 16, 19 | "H": 17, 20 | "x": 18, 21 | "d": 19, 22 | "*": 20, 23 | "r": 21, 24 | "z": 22, 25 | "s": 23, 26 | "$": 24, 27 | "S": 25, 28 | "D": 26, 29 | "T": 27, 30 | "Z": 28, 31 | "E": 29, 32 | "g": 30, 33 | "_": 31, 34 | "f": 32, 35 | "q": 33, 36 | "k": 34, 37 | "l": 35, 38 | "m": 36, 39 | "n": 37, 40 | "h": 38, 41 | "w": 39, 42 | "Y": 40, 43 | "y": 41, 44 | "F": 42, 45 | "N": 43, 46 | "K": 44, 47 | "a": 45, 48 | "u": 46, 49 | "i": 47, 50 | "~": 48, 51 | "o": 49, 52 | "`": 50, 53 | "{": 51, 54 | "P": 52, 55 | "J": 53, 56 | "V": 54, 57 | "G": 55 58 | } -------------------------------------------------------------------------------- /examples/tensorflow/benchmarking/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # 🤗 Benchmark results 18 | 19 | Here, you can find a list of the different benchmark results created by the community. 20 | 21 | If you would like to list benchmark results on your favorite models of the [model hub](https://huggingface.co/models) here, please open a Pull Request and add it below. 22 | 23 | | Benchmark description | Results | Environment info | Author | 24 | |:----------|:-------------|:-------------|------:| 25 | | PyTorch Benchmark on inference for `bert-base-cased` |[memory](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_memory.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 26 | | PyTorch Benchmark on inference for `bert-base-cased` |[time](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_time.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Partick von Platen](https://github.com/patrickvonplaten) | 27 | -------------------------------------------------------------------------------- /examples/tensorflow/benchmarking/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 2.3 -------------------------------------------------------------------------------- /examples/tensorflow/multiple-choice/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Multiple Choice 18 | 19 | ## Fine-tuning on SWAG 20 | 21 | ```bash 22 | export SWAG_DIR=/path/to/swag_data_dir 23 | python ./examples/multiple-choice/run_tf_multiple_choice.py \ 24 | --task_name swag \ 25 | --model_name_or_path bert-base-cased \ 26 | --do_train \ 27 | --do_eval \ 28 | --data_dir $SWAG_DIR \ 29 | --learning_rate 5e-5 \ 30 | --num_train_epochs 3 \ 31 | --max_seq_length 80 \ 32 | --output_dir models_bert/swag_base \ 33 | --per_gpu_eval_batch_size=16 \ 34 | --per_device_train_batch_size=16 \ 35 | --logging-dir logs \ 36 | --gradient_accumulation_steps 2 \ 37 | --overwrite_output 38 | ``` 39 | -------------------------------------------------------------------------------- /examples/tensorflow/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece != 0.1.92 2 | protobuf 3 | tensorflow >= 2.3 4 | -------------------------------------------------------------------------------- /examples/tensorflow/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | -------------------------------------------------------------------------------- /examples/tensorflow/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | sentencepiece != 0.1.92 3 | protobuf 4 | tensorflow >= 2.3 -------------------------------------------------------------------------------- /exps/romanian_postprocess.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | ROOT=mosesdecoder 4 | ro_post_process () { 5 | sys=$1 6 | ref=$2 7 | export MOSES_PATH=$ROOT 8 | REPLACE_UNICODE_PUNCT=$MOSES_PATH/scripts/tokenizer/replace-unicode-punctuation.perl 9 | NORM_PUNC=$MOSES_PATH/scripts/tokenizer/normalize-punctuation.perl 10 | REM_NON_PRINT_CHAR=$MOSES_PATH/scripts/tokenizer/remove-non-printing-char.perl 11 | REMOVE_DIACRITICS=$MOSES_PATH/wmt16-scripts/preprocess/remove-diacritics.py 12 | NORMALIZE_ROMANIAN=$MOSES_PATH/wmt16-scripts/preprocess/normalise-romanian.py 13 | TOKENIZER=$MOSES_PATH/scripts/tokenizer/tokenizer.perl 14 | 15 | lang=ro 16 | for file in $sys $ref; do 17 | cat $file \ 18 | | $REPLACE_UNICODE_PUNCT \ 19 | | $NORM_PUNC -l $lang \ 20 | | $REM_NON_PRINT_CHAR \ 21 | | $NORMALIZE_ROMANIAN \ 22 | | $REMOVE_DIACRITICS \ 23 | | $TOKENIZER -no-escape -l $lang \ 24 | > $(basename $file).tok 25 | done 26 | # compute BLEU 27 | cat $(basename $sys).tok | sacrebleu -tok none -s none -b $(basename $ref).tok 28 | } 29 | 30 | 31 | ro_post_process ${1} ${2} -------------------------------------------------------------------------------- /img/intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/img/intro.png -------------------------------------------------------------------------------- /model_cards/README.md: -------------------------------------------------------------------------------- 1 | ## 🔥 Model cards now live inside each huggingface.co model repo 🔥 2 | 3 | 4 | For consistency, ease of use and scalability, `README.md` model cards now live directly inside each model repo on the HuggingFace model hub. 5 | 6 | ### How to update a model card 7 | 8 | You can directly update a model card inside any model repo you have **write access** to, i.e.: 9 | - a model under your username namespace 10 | - a model under any organization you are a part of. 11 | 12 | You can either: 13 | - update it, commit and push using your usual git workflow (command line, GUI, etc.) 14 | - or edit it directly from the website's UI. 15 | 16 | **What if you want to create or update a model card for a model you don't have write access to?** 17 | 18 | In that case, given that we don't have a Pull request system yet on huggingface.co (🤯), 19 | you can open an issue here, post the card's content, and tag the model author(s) and/or the Hugging Face team. 20 | 21 | We might implement a more seamless process at some point, so your early feedback is precious! 22 | Please let us know of any suggestion. 23 | 24 | ### What happened to the model cards here? 25 | 26 | We migrated every model card from the repo to its corresponding huggingface.co model repo. Individual commits were preserved, and they link back to the original commit on GitHub. 27 | -------------------------------------------------------------------------------- /petl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/petl/__init__.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py35'] 4 | -------------------------------------------------------------------------------- /scripts/fsmt/tests-to-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # these scripts need to be run before any changes to FSMT-related code - it should cover all bases 17 | 18 | CUDA_VISIBLE_DEVICES="" RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py 19 | RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py 20 | -------------------------------------------------------------------------------- /scripts/pegasus/build_test_sample_spm_no_bos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # this script builds a small sample spm file tests/fixtures/test_sentencepiece_no_bos.model, with features needed by pegasus 17 | 18 | # 1. pip install sentencepiece 19 | # 20 | # 2. wget https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt 21 | 22 | # 3. build 23 | import sentencepiece as spm 24 | 25 | # pegasus: 26 | # 1. no bos 27 | # 2. eos_id is 1 28 | # 3. unk_id is 2 29 | # build a sample spm file accordingly 30 | spm.SentencePieceTrainer.train('--input=botchan.txt --model_prefix=test_sentencepiece_no_bos --bos_id=-1 --unk_id=2 --eos_id=1 --vocab_size=1000') 31 | 32 | # 4. now update the fixture 33 | # mv test_sentencepiece_no_bos.model ../../tests/fixtures/ 34 | -------------------------------------------------------------------------------- /scripts/tatoeba/upload_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for FILE in converted/*; do 4 | model_name=`basename $FILE` 5 | transformers-cli repo create $model_name -y 6 | git clone https://huggingface.co/Helsinki-NLP/$model_name 7 | mv $FILE/* $model_name/ 8 | cd $model_name 9 | git add . && git commit -m "initial commit" 10 | git push 11 | cd .. 12 | done 13 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = transformers 7 | known_third_party = 8 | absl 9 | conllu 10 | datasets 11 | elasticsearch 12 | fairseq 13 | faiss-cpu 14 | fastprogress 15 | fire 16 | fugashi 17 | git 18 | h5py 19 | matplotlib 20 | nltk 21 | numpy 22 | packaging 23 | pandas 24 | PIL 25 | psutil 26 | pytest 27 | pytorch_lightning 28 | rouge_score 29 | sacrebleu 30 | seqeval 31 | sklearn 32 | streamlit 33 | tensorboardX 34 | tensorflow 35 | tensorflow_datasets 36 | timeout_decorator 37 | torch 38 | torchaudio 39 | torchtext 40 | torchvision 41 | torch_xla 42 | tqdm 43 | 44 | line_length = 119 45 | lines_after_imports = 2 46 | multi_line_output = 3 47 | use_parentheses = True 48 | 49 | [flake8] 50 | ignore = E203, E501, E741, W503, W605 51 | max-line-length = 119 52 | -------------------------------------------------------------------------------- /src/transformers/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/src/transformers/benchmark/__init__.py -------------------------------------------------------------------------------- /src/transformers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseTransformersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /src/transformers/data/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .metrics import glue_compute_metrics, xnli_compute_metrics 20 | from .processors import ( 21 | DataProcessor, 22 | InputExample, 23 | InputFeatures, 24 | SingleSentenceClassificationProcessor, 25 | SquadExample, 26 | SquadFeatures, 27 | SquadV1Processor, 28 | SquadV2Processor, 29 | glue_convert_examples_to_features, 30 | glue_output_modes, 31 | glue_processors, 32 | glue_tasks_num_labels, 33 | squad_convert_examples_to_features, 34 | xnli_output_modes, 35 | xnli_processors, 36 | xnli_tasks_num_labels, 37 | ) 38 | -------------------------------------------------------------------------------- /src/transformers/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .glue import GlueDataset, GlueDataTrainingArguments 20 | from .language_modeling import ( 21 | LineByLineTextDataset, 22 | LineByLineWithRefDataset, 23 | LineByLineWithSOPTextDataset, 24 | TextDataset, 25 | TextDatasetForNextSentencePrediction, 26 | ) 27 | from .squad import SquadDataset, SquadDataTrainingArguments 28 | -------------------------------------------------------------------------------- /src/transformers/data/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels 20 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features 21 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor 22 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels 23 | -------------------------------------------------------------------------------- /src/transformers/models/bert_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from typing import TYPE_CHECKING 20 | 21 | from ...file_utils import _BaseLazyModule 22 | 23 | 24 | _import_structure = { 25 | "tokenization_bert_japanese": ["BertJapaneseTokenizer", "CharacterTokenizer", "MecabTokenizer"], 26 | } 27 | 28 | 29 | if TYPE_CHECKING: 30 | from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer 31 | 32 | else: 33 | import importlib 34 | import os 35 | import sys 36 | 37 | class _LazyModule(_BaseLazyModule): 38 | """ 39 | Module class that surfaces all objects but only performs associated imports when the objects are requested. 40 | """ 41 | 42 | __file__ = globals()["__file__"] 43 | __path__ = [os.path.dirname(__file__)] 44 | 45 | def _get_module(self, module_name: str): 46 | return importlib.import_module("." + module_name, self.__name__) 47 | 48 | sys.modules[__name__] = _LazyModule(__name__, _import_structure) 49 | -------------------------------------------------------------------------------- /src/transformers/models/bertweet/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from typing import TYPE_CHECKING 20 | 21 | from ...file_utils import _BaseLazyModule 22 | 23 | 24 | _import_structure = { 25 | "tokenization_bertweet": ["BertweetTokenizer"], 26 | } 27 | 28 | 29 | if TYPE_CHECKING: 30 | from .tokenization_bertweet import BertweetTokenizer 31 | 32 | else: 33 | import importlib 34 | import os 35 | import sys 36 | 37 | class _LazyModule(_BaseLazyModule): 38 | """ 39 | Module class that surfaces all objects but only performs associated imports when the objects are requested. 40 | """ 41 | 42 | __file__ = globals()["__file__"] 43 | __path__ = [os.path.dirname(__file__)] 44 | 45 | def _get_module(self, module_name: str): 46 | return importlib.import_module("." + module_name, self.__name__) 47 | 48 | sys.modules[__name__] = _LazyModule(__name__, _import_structure) 49 | -------------------------------------------------------------------------------- /src/transformers/models/byt5/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2021 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from typing import TYPE_CHECKING 20 | 21 | from ...file_utils import _BaseLazyModule 22 | 23 | 24 | _import_structure = { 25 | "tokenization_byt5": ["ByT5Tokenizer"], 26 | } 27 | 28 | 29 | if TYPE_CHECKING: 30 | from .tokenization_byt5 import ByT5Tokenizer 31 | else: 32 | import importlib 33 | import os 34 | import sys 35 | 36 | class _LazyModule(_BaseLazyModule): 37 | """ 38 | Module class that surfaces all objects but only performs associated imports when the objects are requested. 39 | """ 40 | 41 | __file__ = globals()["__file__"] 42 | __path__ = [os.path.dirname(__file__)] 43 | 44 | def _get_module(self, module_name: str): 45 | return importlib.import_module("." + module_name, self.__name__) 46 | 47 | sys.modules[__name__] = _LazyModule(__name__, _import_structure) 48 | -------------------------------------------------------------------------------- /src/transformers/models/camembert/configuration_camembert.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. 3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """ CamemBERT configuration """ 17 | 18 | from ...utils import logging 19 | from ..roberta.configuration_roberta import RobertaConfig 20 | 21 | 22 | logger = logging.get_logger(__name__) 23 | 24 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { 25 | "camembert-base": "https://huggingface.co/camembert-base/resolve/main/config.json", 26 | "umberto-commoncrawl-cased-v1": "https://huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/resolve/main/config.json", 27 | "umberto-wikipedia-uncased-v1": "https://huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/resolve/main/config.json", 28 | } 29 | 30 | 31 | class CamembertConfig(RobertaConfig): 32 | """ 33 | This class overrides :class:`~transformers.RobertaConfig`. Please check the superclass for the appropriate 34 | documentation alongside usage examples. 35 | """ 36 | 37 | model_type = "camembert" 38 | -------------------------------------------------------------------------------- /src/transformers/models/cpm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from typing import TYPE_CHECKING 20 | 21 | from ...file_utils import _BaseLazyModule 22 | 23 | 24 | _import_structure = { 25 | "tokenization_cpm": ["CpmTokenizer"], 26 | } 27 | 28 | 29 | if TYPE_CHECKING: 30 | from .tokenization_cpm import CpmTokenizer 31 | 32 | else: 33 | import importlib 34 | import os 35 | import sys 36 | 37 | class _LazyModule(_BaseLazyModule): 38 | """ 39 | Module class that surfaces all objects but only performs associated imports when the objects are requested. 40 | """ 41 | 42 | __file__ = globals()["__file__"] 43 | __path__ = [os.path.dirname(__file__)] 44 | 45 | def _get_module(self, module_name: str): 46 | return importlib.import_module("." + module_name, self.__name__) 47 | 48 | sys.modules[__name__] = _LazyModule(__name__, _import_structure) 49 | -------------------------------------------------------------------------------- /src/transformers/models/dialogpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/src/transformers/models/dialogpt/__init__.py -------------------------------------------------------------------------------- /src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | import os 17 | 18 | import torch 19 | 20 | from transformers.file_utils import WEIGHTS_NAME 21 | 22 | 23 | DIALOGPT_MODELS = ["small", "medium", "large"] 24 | 25 | OLD_KEY = "lm_head.decoder.weight" 26 | NEW_KEY = "lm_head.weight" 27 | 28 | 29 | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str): 30 | d = torch.load(checkpoint_path) 31 | d[NEW_KEY] = d.pop(OLD_KEY) 32 | os.makedirs(pytorch_dump_folder_path, exist_ok=True) 33 | torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)) 34 | 35 | 36 | if __name__ == "__main__": 37 | parser = argparse.ArgumentParser() 38 | parser.add_argument("--dialogpt_path", default=".", type=str) 39 | args = parser.parse_args() 40 | for MODEL in DIALOGPT_MODELS: 41 | checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl") 42 | pytorch_dump_folder_path = f"./DialoGPT-{MODEL}" 43 | convert_dialogpt_checkpoint( 44 | checkpoint_path, 45 | pytorch_dump_folder_path, 46 | ) 47 | -------------------------------------------------------------------------------- /src/transformers/models/mmbt/configuration_mmbt.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # Copyright (c) HuggingFace Inc. team. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """ MMBT configuration """ 17 | 18 | from ...utils import logging 19 | 20 | 21 | logger = logging.get_logger(__name__) 22 | 23 | 24 | class MMBTConfig(object): 25 | """ 26 | This is the configuration class to store the configuration of a :class:`~transformers.MMBTModel`. It is used to 27 | instantiate a MMBT model according to the specified arguments, defining the model architecture. 28 | 29 | Args: 30 | config (:class:`~transformers.PreTrainedConfig`): 31 | Config of the underlying Transformer models. Its values are copied over to use a single config. 32 | num_labels (:obj:`int`, `optional`): 33 | Size of final Linear layer for classification. 34 | modal_hidden_size (:obj:`int`, `optional`, defaults to 2048): 35 | Embedding dimension of the non-text modality encoder. 36 | """ 37 | 38 | def __init__(self, config, num_labels=None, modal_hidden_size=2048): 39 | self.__dict__ = config.__dict__ 40 | self.modal_hidden_size = modal_hidden_size 41 | if num_labels: 42 | self.num_labels = num_labels 43 | -------------------------------------------------------------------------------- /src/transformers/models/phobert/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from typing import TYPE_CHECKING 20 | 21 | from ...file_utils import _BaseLazyModule 22 | 23 | 24 | _import_structure = { 25 | "tokenization_phobert": ["PhobertTokenizer"], 26 | } 27 | 28 | 29 | if TYPE_CHECKING: 30 | from .tokenization_phobert import PhobertTokenizer 31 | 32 | else: 33 | import importlib 34 | import os 35 | import sys 36 | 37 | class _LazyModule(_BaseLazyModule): 38 | """ 39 | Module class that surfaces all objects but only performs associated imports when the objects are requested. 40 | """ 41 | 42 | __file__ = globals()["__file__"] 43 | __path__ = [os.path.dirname(__file__)] 44 | 45 | def _get_module(self, module_name: str): 46 | return importlib.import_module("." + module_name, self.__name__) 47 | 48 | sys.modules[__name__] = _LazyModule(__name__, _import_structure) 49 | -------------------------------------------------------------------------------- /src/transformers/models/xlm_prophetnet/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2020 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from ...file_utils import is_sentencepiece_available, is_torch_available 20 | from .configuration_xlm_prophetnet import XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMProphetNetConfig 21 | 22 | 23 | if is_sentencepiece_available(): 24 | from .tokenization_xlm_prophetnet import XLMProphetNetTokenizer 25 | 26 | if is_torch_available(): 27 | from .modeling_xlm_prophetnet import ( 28 | XLM_PROPHETNET_PRETRAINED_MODEL_ARCHIVE_LIST, 29 | XLMProphetNetDecoder, 30 | XLMProphetNetEncoder, 31 | XLMProphetNetForCausalLM, 32 | XLMProphetNetForConditionalGeneration, 33 | XLMProphetNetModel, 34 | ) 35 | -------------------------------------------------------------------------------- /src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Microsoft Authors and The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ XLM-ProphetNet model configuration """ 16 | 17 | 18 | from ...utils import logging 19 | from ..prophetnet.configuration_prophetnet import ProphetNetConfig 20 | 21 | 22 | logger = logging.get_logger(__name__) 23 | 24 | XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP = { 25 | "microsoft/xprophetnet-large-wiki100-cased": "https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/resolve/main/config.json", 26 | } 27 | 28 | 29 | class XLMProphetNetConfig(ProphetNetConfig): 30 | """ 31 | This class overrides :class:`~transformers.ProphetNetConfig`. Please check the superclass for the appropriate 32 | documentation alongside usage examples. 33 | """ 34 | 35 | model_type = "xlm-prophetnet" 36 | -------------------------------------------------------------------------------- /src/transformers/sagemaker/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | # Copyright 2021 The HuggingFace Team. All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | from .trainer_sm import SageMakerTrainer 20 | from .training_args_sm import SageMakerTrainingArguments, is_sagemaker_dp_enabled 21 | -------------------------------------------------------------------------------- /src/transformers/sagemaker/trainer_sm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import warnings 15 | 16 | from ..trainer import Trainer 17 | from ..utils import logging 18 | 19 | 20 | logger = logging.get_logger(__name__) 21 | 22 | 23 | class SageMakerTrainer(Trainer): 24 | def __init__(self, args=None, **kwargs): 25 | warnings.warn( 26 | "`SageMakerTrainer` is deprecated and will be removed in v5 of Transformers. You can use `Trainer` " 27 | "instead.", 28 | FutureWarning, 29 | ) 30 | super().__init__(args=args, **kwargs) 31 | -------------------------------------------------------------------------------- /src/transformers/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from packaging import version 18 | 19 | from .. import __version__ 20 | 21 | 22 | def check_min_version(min_version): 23 | if version.parse(__version__) < version.parse(min_version): 24 | if "dev" in min_version: 25 | error_message = ( 26 | "This example requires a source install from HuggingFace Transformers (see " 27 | "`https://huggingface.co/transformers/installation.html#installing-from-source`)," 28 | ) 29 | else: 30 | error_message = f"This example requires a minimum version of {min_version}," 31 | error_message += f" but the version found is {__version__}.\n" 32 | raise ImportError( 33 | error_message 34 | + ( 35 | "Check out https://huggingface.co/transformers/examples.html for the examples corresponding to other " 36 | "versions of HuggingFace Transformers." 37 | ) 38 | ) 39 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_sentencepiece_and_speech_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..file_utils import requires_backends 3 | 4 | 5 | class Speech2TextProcessor: 6 | def __init__(self, *args, **kwargs): 7 | requires_backends(self, ["sentencepiece", "speech"]) 8 | 9 | @classmethod 10 | def from_pretrained(cls, *args, **kwargs): 11 | requires_backends(cls, ["sentencepiece", "speech"]) 12 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_sentencepiece_and_tokenizers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..file_utils import requires_backends 3 | 4 | 5 | SLOW_TO_FAST_CONVERTERS = None 6 | 7 | 8 | def convert_slow_tokenizer(*args, **kwargs): 9 | requires_backends(convert_slow_tokenizer, ["sentencepiece", "tokenizers"]) 10 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_speech_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..file_utils import requires_backends 3 | 4 | 5 | class Speech2TextFeatureExtractor: 6 | def __init__(self, *args, **kwargs): 7 | requires_backends(self, ["speech"]) 8 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_timm_and_vision_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..file_utils import requires_backends 3 | 4 | 5 | DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None 6 | 7 | 8 | class DetrForObjectDetection: 9 | def __init__(self, *args, **kwargs): 10 | requires_backends(self, ["timm", "vision"]) 11 | 12 | @classmethod 13 | def from_pretrained(cls, *args, **kwargs): 14 | requires_backends(cls, ["timm", "vision"]) 15 | 16 | 17 | class DetrForSegmentation: 18 | def __init__(self, *args, **kwargs): 19 | requires_backends(self, ["timm", "vision"]) 20 | 21 | @classmethod 22 | def from_pretrained(cls, *args, **kwargs): 23 | requires_backends(cls, ["timm", "vision"]) 24 | 25 | 26 | class DetrModel: 27 | def __init__(self, *args, **kwargs): 28 | requires_backends(self, ["timm", "vision"]) 29 | 30 | @classmethod 31 | def from_pretrained(cls, *args, **kwargs): 32 | requires_backends(cls, ["timm", "vision"]) 33 | 34 | 35 | class DetrPreTrainedModel: 36 | def __init__(self, *args, **kwargs): 37 | requires_backends(self, ["timm", "vision"]) 38 | 39 | @classmethod 40 | def from_pretrained(cls, *args, **kwargs): 41 | requires_backends(cls, ["timm", "vision"]) 42 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_timm_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..file_utils import requires_backends 3 | 4 | 5 | DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None 6 | 7 | 8 | class DetrForObjectDetection: 9 | def __init__(self, *args, **kwargs): 10 | requires_backends(self, ["timm"]) 11 | 12 | @classmethod 13 | def from_pretrained(cls, *args, **kwargs): 14 | requires_backends(cls, ["timm"]) 15 | 16 | 17 | class DetrForSegmentation: 18 | def __init__(self, *args, **kwargs): 19 | requires_backends(self, ["timm"]) 20 | 21 | @classmethod 22 | def from_pretrained(cls, *args, **kwargs): 23 | requires_backends(cls, ["timm"]) 24 | 25 | 26 | class DetrModel: 27 | def __init__(self, *args, **kwargs): 28 | requires_backends(self, ["timm"]) 29 | 30 | @classmethod 31 | def from_pretrained(cls, *args, **kwargs): 32 | requires_backends(cls, ["timm"]) 33 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_vision_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..file_utils import requires_backends 3 | 4 | 5 | class ImageFeatureExtractionMixin: 6 | def __init__(self, *args, **kwargs): 7 | requires_backends(self, ["vision"]) 8 | 9 | 10 | class CLIPFeatureExtractor: 11 | def __init__(self, *args, **kwargs): 12 | requires_backends(self, ["vision"]) 13 | 14 | 15 | class CLIPProcessor: 16 | def __init__(self, *args, **kwargs): 17 | requires_backends(self, ["vision"]) 18 | 19 | @classmethod 20 | def from_pretrained(cls, *args, **kwargs): 21 | requires_backends(cls, ["vision"]) 22 | 23 | 24 | class DeiTFeatureExtractor: 25 | def __init__(self, *args, **kwargs): 26 | requires_backends(self, ["vision"]) 27 | 28 | 29 | class DetrFeatureExtractor: 30 | def __init__(self, *args, **kwargs): 31 | requires_backends(self, ["vision"]) 32 | 33 | 34 | class ViTFeatureExtractor: 35 | def __init__(self, *args, **kwargs): 36 | requires_backends(self, ["vision"]) 37 | -------------------------------------------------------------------------------- /templates/adding_a_new_example_script/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # How to add a new example script in 🤗 Transformers 18 | 19 | This folder provide a template for adding a new example script implementing a training or inference task with the 20 | models in the 🤗 Transformers library. To use it, you will need to install cookiecutter: 21 | ``` 22 | pip install cookiecutter 23 | ``` 24 | or refer to the installation page of the [cookiecutter documentation](https://cookiecutter.readthedocs.io/). 25 | 26 | You can then run the following command inside the `examples` folder of the transformers repo: 27 | ``` 28 | cookiecutter ../templates/adding_a_new_example_script/ 29 | ``` 30 | and answer the questions asked, which will generate a new folder where you will find a pre-filled template for your 31 | example following the best practices we recommend for them. 32 | 33 | Adjust the way the data is preprocessed, the model is loaded or the Trainer is instantiated then when you're happy, add 34 | a `README.md` in the folder (or complete the existing one if you added a script to an existing folder) telling a user 35 | how to run your script. 36 | 37 | Make a PR to the 🤗 Transformers repo. Don't forget to tweet about your new example with a carbon screenshot of how to 38 | run it and tag @huggingface! 39 | -------------------------------------------------------------------------------- /templates/adding_a_new_example_script/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "example_name": "text classification", 3 | "directory_name": "{{cookiecutter.example_name|lower|replace(' ', '-')}}", 4 | "example_shortcut": "{{cookiecutter.directory_name}}", 5 | "model_class": "AutoModel", 6 | "authors": "The HuggingFace Team", 7 | "can_train_from_scratch": ["True", "False"], 8 | "with_trainer": ["True", "False"] 9 | } -------------------------------------------------------------------------------- /templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "{{cookiecutter.modelname}}", 3 | "uppercase_modelname": "{{cookiecutter.uppercase_modelname}}", 4 | "lowercase_modelname": "{{cookiecutter.lowercase_modelname}}", 5 | "camelcase_modelname": "{{cookiecutter.camelcase_modelname}}", 6 | "authors": "{{cookiecutter.authors}}", 7 | "checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}", 8 | "tokenizer_type": "{{cookiecutter.tokenizer_type}}", 9 | "generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}", 10 | "is_encoder_decoder_model": ["True", "False"] 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "BrandNewBERT", 3 | "uppercase_modelname": "BRAND_NEW_BERT", 4 | "lowercase_modelname": "brand_new_bert", 5 | "camelcase_modelname": "BrandNewBert", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"], 9 | "generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"], 10 | "is_encoder_decoder_model": ["True", "False"] 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/open_model_proposals/README.md: -------------------------------------------------------------------------------- 1 | Currently the following model proposals are available: 2 | 3 | - [BigBird (Google)](./ADD_BIG_BIRD.md) 4 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "Template", 3 | "uppercase_modelname": "TEMPLATE", 4 | "lowercase_modelname": "template", 5 | "camelcase_modelname": "Template", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplatePT", 3 | "uppercase_modelname": "TEMPLATE_PT", 4 | "lowercase_modelname": "template_pt", 5 | "camelcase_modelname": "TemplatePt", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_and_pytorch": "PyTorch", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "NewENCDEC", 3 | "uppercase_modelname": "NEW_ENC_DEC", 4 | "lowercase_modelname": "new_enc_dec", 5 | "camelcase_modelname": "NewEncDec", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "new-enc-dec-base", 8 | "tokenizer_type": "Based on BART", 9 | "generate_tensorflow_and_pytorch": "PyTorch", 10 | "is_encoder_decoder_model": "True" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/standalone.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateBI", 3 | "uppercase_modelname": "TEMPLATE_BI", 4 | "lowercase_modelname": "template_bi", 5 | "camelcase_modelname": "TemplateBi", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "bi-brand-new-bert-base-cased", 8 | "tokenizer_type": "Standalone", 9 | "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateTF", 3 | "uppercase_modelname": "TEMPLATE_TF", 4 | "lowercase_modelname": "template_tf", 5 | "camelcase_modelname": "TemplateTf", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_and_pytorch": "TensorFlow", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "NewTFENCDEC", 3 | "uppercase_modelname": "NEW_TF_ENC_DEC", 4 | "lowercase_modelname": "new_tf_enc_dec", 5 | "camelcase_modelname": "NewTFEncDec", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "new-tf-enc-dec-base", 8 | "tokenizer_type": "Based on BART", 9 | "generate_tensorflow_and_pytorch": "TensorFlow", 10 | "is_encoder_decoder_model": "True" 11 | } 12 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/__init__.py -------------------------------------------------------------------------------- /tests/deepspeed/ds_config_zero2.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | 11 | "optimizer": { 12 | "type": "AdamW", 13 | "params": { 14 | "lr": "auto", 15 | "betas": "auto", 16 | "eps": "auto", 17 | "weight_decay": "auto" 18 | } 19 | }, 20 | 21 | "scheduler": { 22 | "type": "WarmupLR", 23 | "params": { 24 | "warmup_min_lr": "auto", 25 | "warmup_max_lr": "auto", 26 | "warmup_num_steps": "auto" 27 | } 28 | }, 29 | 30 | "zero_optimization": { 31 | "stage": 2, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "allgather_partitions": true, 37 | "allgather_bucket_size": 2e8, 38 | "overlap_comm": true, 39 | "reduce_scatter": true, 40 | "reduce_bucket_size": 2e8, 41 | "contiguous_gradients": true 42 | }, 43 | 44 | "gradient_accumulation_steps": "auto", 45 | "gradient_clipping": "auto", 46 | "steps_per_print": 2000, 47 | "train_batch_size": "auto", 48 | "train_micro_batch_size_per_gpu": "auto", 49 | "wall_clock_breakdown": false 50 | } 51 | -------------------------------------------------------------------------------- /tests/deepspeed/ds_config_zero3.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | 11 | "optimizer": { 12 | "type": "AdamW", 13 | "params": { 14 | "lr": "auto", 15 | "betas": "auto", 16 | "eps": "auto", 17 | "weight_decay": "auto" 18 | } 19 | }, 20 | 21 | "scheduler": { 22 | "type": "WarmupLR", 23 | "params": { 24 | "warmup_min_lr": "auto", 25 | "warmup_max_lr": "auto", 26 | "warmup_num_steps": "auto" 27 | } 28 | }, 29 | 30 | "zero_optimization": { 31 | "stage": 3, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "offload_param": { 37 | "device": "cpu", 38 | "pin_memory": true 39 | }, 40 | "overlap_comm": true, 41 | "contiguous_gradients": true, 42 | "sub_group_size": 1e9, 43 | "reduce_bucket_size": "auto", 44 | "stage3_prefetch_bucket_size": "auto", 45 | "stage3_param_persistence_threshold": "auto", 46 | "stage3_max_live_parameters": 1e9, 47 | "stage3_max_reuse_distance": 1e9, 48 | "stage3_gather_fp16_weights_on_model_save": true 49 | }, 50 | 51 | "gradient_accumulation_steps": "auto", 52 | "gradient_clipping": "auto", 53 | "steps_per_print": 2000, 54 | "train_batch_size": "auto", 55 | "train_micro_batch_size_per_gpu": "auto", 56 | "wall_clock_breakdown": false 57 | } 58 | -------------------------------------------------------------------------------- /tests/fixtures/dummy-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_type": "roberta" 3 | } -------------------------------------------------------------------------------- /tests/fixtures/dummy_feature_extractor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_extractor_type": "Wav2Vec2FeatureExtractor" 3 | } -------------------------------------------------------------------------------- /tests/fixtures/empty.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/empty.txt -------------------------------------------------------------------------------- /tests/fixtures/input.txt: -------------------------------------------------------------------------------- 1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer 2 | -------------------------------------------------------------------------------- /tests/fixtures/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_extractor_type": "Wav2Vec2FeatureExtractor" 3 | } -------------------------------------------------------------------------------- /tests/fixtures/spiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/spiece.model -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/test_sentencepiece.model -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_bpe.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/test_sentencepiece_bpe.model -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_no_bos.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/test_sentencepiece_no_bos.model -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/.gitignore: -------------------------------------------------------------------------------- 1 | cache* 2 | temp* 3 | !*.txt 4 | !*.tsv 5 | !*.json 6 | !.gitignore -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/000000039769.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/tests_samples/COCO/000000039769.png -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/coco_panoptic/000000039769.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxhe/unify-parameter-efficient-tuning/3222ce2c0079566a28043e22380eb4ab6ad14389/tests/fixtures/tests_samples/COCO/coco_panoptic/000000039769.png -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt: -------------------------------------------------------------------------------- 1 | [{"id": 8222595, "category_id": 17, "iscrowd": 0, "bbox": [18, 54, 301, 415], "area": 53306}, {"id": 8225432, "category_id": 17, "iscrowd": 0, "bbox": [349, 26, 291, 343], "area": 59627}, {"id": 8798150, "category_id": 63, "iscrowd": 0, "bbox": [1, 0, 639, 474], "area": 174579}, {"id": 14466198, "category_id": 75, "iscrowd": 0, "bbox": [42, 74, 133, 45], "area": 4068}, {"id": 12821912, "category_id": 75, "iscrowd": 0, "bbox": [333, 80, 38, 106], "area": 2118}, {"id": 10898909, "category_id": 93, "iscrowd": 0, "bbox": [0, 0, 640, 480], "area": 2750}] -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/GermEval/labels.txt: -------------------------------------------------------------------------------- 1 | B-LOC 2 | B-LOCderiv 3 | B-LOCpart 4 | B-ORG 5 | B-ORGderiv 6 | B-ORGpart 7 | B-OTH 8 | B-OTHderiv 9 | B-OTHpart 10 | B-PER 11 | B-PERderiv 12 | B-PERpart 13 | I-LOC 14 | I-LOCderiv 15 | I-LOCpart 16 | I-ORG 17 | I-ORGderiv 18 | I-ORGpart 19 | I-OTH 20 | I-OTHderiv 21 | I-OTHpart 22 | I-PER 23 | I-PERderiv 24 | I-PERpart 25 | O 26 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/MRPC/dev.csv: -------------------------------------------------------------------------------- 1 | label,sentence1,sentence2 2 | equivalent,He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .,""" The foodservice pie business does not fit our long-term growth strategy ." 3 | not_equivalent,Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .,"His wife said he was "" 100 percent behind George Bush "" and looked forward to using his years of training in the war ." 4 | not_equivalent,"The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .","The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent ." 5 | equivalent,The AFL-CIO is waiting until October to decide if it will endorse a candidate .,The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . 6 | not_equivalent,No dates have been set for the civil or the criminal trial .,"No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty ." 7 | equivalent,Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .,It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status . 8 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/MRPC/dev.tsv: -------------------------------------------------------------------------------- 1 | Quality #1 ID #2 ID #1 String #2 String 2 | 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy . 3 | 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war . 4 | 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent . 5 | 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . 6 | 0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty . 7 | 1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status . 8 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/MRPC/train.csv: -------------------------------------------------------------------------------- 1 | label,sentence1,sentence2 2 | equivalent,He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .,""" The foodservice pie business does not fit our long-term growth strategy ." 3 | not_equivalent,Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .,"His wife said he was "" 100 percent behind George Bush "" and looked forward to using his years of training in the war ." 4 | not_equivalent,"The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .","The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent ." 5 | equivalent,The AFL-CIO is waiting until October to decide if it will endorse a candidate .,The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . 6 | not_equivalent,No dates have been set for the civil or the criminal trial .,"No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty ." 7 | equivalent,Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .,It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status . 8 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/MRPC/train.tsv: -------------------------------------------------------------------------------- 1 | Quality #1 ID #2 ID #1 String #2 String 2 | 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy . 3 | 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war . 4 | 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent . 5 | 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . 6 | 0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty . 7 | 1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status . 8 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/dev.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0000 none none A man with a hard hat is dancing. A man wearing a hard hat is dancing. 5.000 3 | 1 main-captions MSRvid 2012test 0002 none none A young child is riding a horse. A child is riding a horse. 4.750 4 | 2 main-captions MSRvid 2012test 0003 none none A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 5.000 5 | 3 main-captions MSRvid 2012test 0007 none none A woman is playing the guitar. A man is playing guitar. 2.400 6 | 4 main-captions MSRvid 2012test 0008 none none A woman is playing the flute. A man is playing a flute. 2.750 7 | 5 main-captions MSRvid 2012test 0010 none none A woman is cutting an onion. A man is cutting onions. 2.615 8 | 6 main-captions MSRvid 2012test 0015 none none A man is erasing a chalk board. The man is erasing the chalk board. 5.000 9 | 7 main-captions MSRvid 2012test 0023 none none A woman is carrying a boy. A woman is carrying her baby. 2.333 10 | 8 main-captions MSRvid 2012test 0027 none none Three men are playing guitars. Three men are on stage playing guitars. 3.750 11 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/train.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0001 none none A plane is taking off. An air plane is taking off. 5.000 3 | 1 main-captions MSRvid 2012test 0004 none none A man is playing a large flute. A man is playing a flute. 3.800 4 | 2 main-captions MSRvid 2012test 0005 none none A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 3.800 5 | 3 main-captions MSRvid 2012test 0006 none none Three men are playing chess. Two men are playing chess. 2.600 6 | 4 main-captions MSRvid 2012test 0009 none none A man is playing the cello. A man seated is playing the cello. 4.250 7 | 5 main-captions MSRvid 2012test 0011 none none Some men are fighting. Two men are fighting. 4.250 8 | 6 main-captions MSRvid 2012test 0012 none none A man is smoking. A man is skating. 0.500 9 | 7 main-captions MSRvid 2012test 0013 none none The man is playing the piano. The man is playing the guitar. 1.600 10 | 8 main-captions MSRvid 2012test 0014 none none A man is playing on a guitar and singing. A woman is playing an acoustic guitar and singing. 2.200 11 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/wmt16/sample.json: -------------------------------------------------------------------------------- 1 | {"translation": {"en": "Membership of Parliament: see Minutes", "ro": "Componenţa Parlamentului: a se vedea procesul-verbal"}} 2 | {"translation": {"en": "Approval of Minutes of previous sitting: see Minutes", "ro": "Aprobarea procesului-verbal al şedinţei precedente: a se vedea procesul-verbal"}} 3 | {"translation": {"en": "Membership of Parliament: see Minutes", "ro": "Componenţa Parlamentului: a se vedea procesul-verbal"}} 4 | {"translation": {"en": "Verification of credentials: see Minutes", "ro": "Verificarea prerogativelor: a se vedea procesul-verbal"}} 5 | {"translation": {"en": "Documents received: see Minutes", "ro": "Depunere de documente: a se vedea procesul-verbal"}} 6 | {"translation": {"en": "Written statements and oral questions (tabling): see Minutes", "ro": "Declaraţii scrise şi întrebări orale (depunere): consultaţi procesul-verbal"}} 7 | {"translation": {"en": "Petitions: see Minutes", "ro": "Petiţii: a se vedea procesul-verbal"}} 8 | {"translation": {"en": "Texts of agreements forwarded by the Council: see Minutes", "ro": "Transmiterea de către Consiliu a textelor acordurilor: a se vedea procesul-verbal"}} 9 | {"translation": {"en": "Action taken on Parliament's resolutions: see Minutes", "ro": "Cursul dat rezoluţiilor Parlamentului: a se vedea procesul-verbal"}} 10 | {"translation": {"en": "Agenda for next sitting: see Minutes", "ro": "Ordinea de zi a următoarei şedinţe: a se vedea procesul-verbal"}} 11 | -------------------------------------------------------------------------------- /tests/sagemaker/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | 4 | def is_sagemaker_available(): 5 | return importlib.util.find_spec("sagemaker") is not None 6 | -------------------------------------------------------------------------------- /tests/sagemaker/scripts/pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers.git@master # install master or adjust ist with vX.X.X for installing version specific transforms -------------------------------------------------------------------------------- /tests/sagemaker/scripts/pytorch/run_ddp.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import subprocess 5 | from argparse import ArgumentParser 6 | 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def parse_args(): 12 | parser = ArgumentParser() 13 | parsed, unknown = parser.parse_known_args() 14 | for arg in unknown: 15 | if arg.startswith(("-", "--")): 16 | parser.add_argument(arg.split("=")[0]) 17 | 18 | return parser.parse_args() 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | port = 8888 24 | num_gpus = int(os.environ["SM_NUM_GPUS"]) 25 | hosts = json.loads(os.environ["SM_HOSTS"]) 26 | num_nodes = len(hosts) 27 | current_host = os.environ["SM_CURRENT_HOST"] 28 | rank = hosts.index(current_host) 29 | os.environ["NCCL_DEBUG"] = "INFO" 30 | 31 | if num_nodes > 1: 32 | cmd = f"""python -m torch.distributed.launch \ 33 | --nnodes={num_nodes} \ 34 | --node_rank={rank} \ 35 | --nproc_per_node={num_gpus} \ 36 | --master_addr={hosts[0]} \ 37 | --master_port={port} \ 38 | ./run_glue.py \ 39 | {"".join([f" --{parameter} {value}" for parameter,value in args.__dict__.items()])}""" 40 | else: 41 | cmd = f"""python -m torch.distributed.launch \ 42 | --nproc_per_node={num_gpus} \ 43 | ./run_glue.py \ 44 | {"".join([f" --{parameter} {value}" for parameter,value in args.__dict__.items()])}""" 45 | try: 46 | subprocess.run(cmd, shell=True) 47 | except Exception as e: 48 | logger.info(e) 49 | 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /tests/sagemaker/scripts/tensorflow/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers.git@master # install master or adjust ist with vX.X.X for installing version specific transforms -------------------------------------------------------------------------------- /tests/test_activations.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from transformers import is_torch_available 18 | from transformers.testing_utils import require_torch 19 | 20 | 21 | if is_torch_available(): 22 | import torch 23 | 24 | from transformers.activations import _gelu_python, gelu_new, get_activation 25 | 26 | 27 | @require_torch 28 | class TestActivations(unittest.TestCase): 29 | def test_gelu_versions(self): 30 | x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100]) 31 | torch_builtin = get_activation("gelu") 32 | self.assertTrue(torch.eq(_gelu_python(x), torch_builtin(x)).all().item()) 33 | self.assertFalse(torch.eq(_gelu_python(x), gelu_new(x)).all().item()) 34 | 35 | def test_get_activation(self): 36 | get_activation("swish") 37 | get_activation("silu") 38 | get_activation("relu") 39 | get_activation("tanh") 40 | get_activation("gelu_new") 41 | get_activation("gelu_fast") 42 | with self.assertRaises(KeyError): 43 | get_activation("bogus") 44 | with self.assertRaises(KeyError): 45 | get_activation(None) 46 | -------------------------------------------------------------------------------- /tests/test_activations_tf.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from transformers import is_tf_available 18 | from transformers.testing_utils import require_tf 19 | 20 | 21 | if is_tf_available(): 22 | from transformers.activations_tf import get_tf_activation 23 | 24 | 25 | @require_tf 26 | class TestTFActivations(unittest.TestCase): 27 | def test_get_activation(self): 28 | get_tf_activation("swish") 29 | get_tf_activation("silu") 30 | get_tf_activation("gelu") 31 | get_tf_activation("relu") 32 | get_tf_activation("tanh") 33 | get_tf_activation("gelu_new") 34 | get_tf_activation("gelu_fast") 35 | get_tf_activation("mish") 36 | with self.assertRaises(KeyError): 37 | get_tf_activation("bogus") 38 | with self.assertRaises(KeyError): 39 | get_tf_activation(None) 40 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019-present, the HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | from unittest.mock import patch 18 | 19 | from transformers.testing_utils import CaptureStd 20 | 21 | 22 | class CLITest(unittest.TestCase): 23 | @patch("sys.argv", ["fakeprogrampath", "env"]) 24 | def test_cli_env(self): 25 | # test transformers-cli env 26 | import transformers.commands.transformers_cli 27 | 28 | with CaptureStd() as cs: 29 | transformers.commands.transformers_cli.main() 30 | assert "Python version" in cs.out 31 | assert "Platform" in cs.out 32 | assert "Using distributed or parallel set-up in script?" in cs.out 33 | -------------------------------------------------------------------------------- /tests/test_feature_extraction_auto.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 the HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | import unittest 18 | 19 | from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor 20 | 21 | 22 | SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures") 23 | SAMPLE_FEATURE_EXTRACTION_CONFIG = os.path.join( 24 | os.path.dirname(os.path.abspath(__file__)), "fixtures/dummy_feature_extractor_config.json" 25 | ) 26 | 27 | 28 | class AutoFeatureExtractorTest(unittest.TestCase): 29 | def test_feature_extractor_from_model_shortcut(self): 30 | config = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h") 31 | self.assertIsInstance(config, Wav2Vec2FeatureExtractor) 32 | 33 | def test_feature_extractor_from_local_directory(self): 34 | config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) 35 | self.assertIsInstance(config, Wav2Vec2FeatureExtractor) 36 | 37 | def test_feature_extractor_from_local_file(self): 38 | config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG) 39 | self.assertIsInstance(config, Wav2Vec2FeatureExtractor) 40 | -------------------------------------------------------------------------------- /tests/test_pipelines_feature_extraction.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from .test_pipelines_common import MonoInputPipelineCommonMixin 18 | 19 | 20 | class FeatureExtractionPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): 21 | pipeline_task = "feature-extraction" 22 | small_models = [ 23 | "sshleifer/tiny-distilbert-base-cased" 24 | ] # Default model - Models tested without the @slow decorator 25 | large_models = [None] # Models tested with the @slow decorator 26 | mandatory_keys = {} # Keys which should be in the output 27 | -------------------------------------------------------------------------------- /tests/test_pipelines_text2text_generation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from .test_pipelines_common import MonoInputPipelineCommonMixin 18 | 19 | 20 | class Text2TextGenerationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): 21 | pipeline_task = "text2text-generation" 22 | small_models = ["patrickvonplaten/t5-tiny-random"] # Default model - Models tested without the @slow decorator 23 | large_models = [] # Models tested with the @slow decorator 24 | invalid_inputs = [4, ""] 25 | mandatory_keys = ["generated_text"] 26 | -------------------------------------------------------------------------------- /tests/test_pipelines_text_classification.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from .test_pipelines_common import MonoInputPipelineCommonMixin 18 | 19 | 20 | class TextClassificationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): 21 | pipeline_task = "sentiment-analysis" 22 | small_models = [ 23 | "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english" 24 | ] # Default model - Models tested without the @slow decorator 25 | large_models = [None] # Models tested with the @slow decorator 26 | mandatory_keys = {"label", "score"} # Keys which should be in the output 27 | -------------------------------------------------------------------------------- /tests/test_tokenization_blenderbot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | # Copyright 2020 The HuggingFace Team. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Tests for Blenderbot Tokenizers, including common tests for BlenderbotSmallTokenizer.""" 17 | import unittest 18 | 19 | from transformers.file_utils import cached_property 20 | from transformers.models.blenderbot.tokenization_blenderbot import BlenderbotTokenizer 21 | 22 | 23 | class Blenderbot3BTokenizerTests(unittest.TestCase): 24 | @cached_property 25 | def tokenizer_3b(self): 26 | return BlenderbotTokenizer.from_pretrained("facebook/blenderbot-3B") 27 | 28 | def test_encode_decode_cycle(self): 29 | tok = self.tokenizer_3b 30 | src_text = " I am a small frog." 31 | encoded = tok([src_text], padding=False, truncation=False)["input_ids"] 32 | decoded = tok.batch_decode(encoded, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] 33 | assert src_text == decoded 34 | 35 | def test_3B_tokenization_same_as_parlai(self): 36 | assert self.tokenizer_3b.add_prefix_space 37 | assert self.tokenizer_3b([" Sam", "Sam"]).input_ids == [[5502, 2], [5502, 2]] 38 | -------------------------------------------------------------------------------- /tests/test_tokenization_cpm.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 HuggingFace Inc. team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from transformers.models.cpm.tokenization_cpm import CpmTokenizer 17 | from transformers.testing_utils import custom_tokenizers 18 | 19 | from .test_modeling_xlnet import XLNetModelTest 20 | 21 | 22 | @custom_tokenizers 23 | class CpmTokenizationTest(XLNetModelTest): 24 | def test_pre_tokenization(self): 25 | tokenizer = CpmTokenizer.from_pretrained("TsinghuaAI/CPM-Generate") 26 | text = "Hugging Face大法好,谁用谁知道。" 27 | normalized_text = "Hugging Face大法好,谁用谁知道。" 28 | bpe_tokens = "▁Hu gg ing ▁ ▂ ▁F ace ▁大法 ▁好 ▁ , ▁谁 ▁用 ▁谁 ▁知 道 ▁ 。".split() 29 | 30 | tokens = tokenizer.tokenize(text) 31 | self.assertListEqual(tokens, bpe_tokens) 32 | 33 | input_tokens = tokens + [tokenizer.unk_token] 34 | 35 | input_bpe_tokens = [13789, 13283, 1421, 8, 10, 1164, 13608, 16528, 63, 8, 9, 440, 108, 440, 121, 90, 8, 12, 0] 36 | self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens) 37 | 38 | reconstructed_text = tokenizer.decode(input_bpe_tokens) 39 | self.assertEqual(reconstructed_text, normalized_text) 40 | -------------------------------------------------------------------------------- /tests/test_tokenization_distilbert.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from transformers import DistilBertTokenizer, DistilBertTokenizerFast 18 | from transformers.testing_utils import require_tokenizers, slow 19 | 20 | from .test_tokenization_bert import BertTokenizationTest 21 | 22 | 23 | @require_tokenizers 24 | class DistilBertTokenizationTest(BertTokenizationTest): 25 | 26 | tokenizer_class = DistilBertTokenizer 27 | rust_tokenizer_class = DistilBertTokenizerFast 28 | test_rust_tokenizer = True 29 | 30 | @slow 31 | def test_sequence_builders(self): 32 | tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") 33 | 34 | text = tokenizer.encode("sequence builders", add_special_tokens=False) 35 | text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False) 36 | 37 | encoded_sentence = tokenizer.build_inputs_with_special_tokens(text) 38 | encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2) 39 | 40 | assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] 41 | assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + text_2 + [ 42 | tokenizer.sep_token_id 43 | ] 44 | -------------------------------------------------------------------------------- /tride/controller.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def override(hidden_states, override): 5 | """chanage the hidden states according to 6 | the arguments specified by override dict 7 | 8 | Args: 9 | 10 | hidden_states (tensor): (B, T, H) 11 | override (dict) 12 | """ 13 | 14 | alpha = override['alpha'] 15 | sklearn_model = override['model'] 16 | index = sklearn_model.coef_[0].nonzero()[0] 17 | w = sklearn_model.coef_[0][index] 18 | b = sklearn_model.intercept_[0] 19 | 20 | hidden_states_cpu = hidden_states.cpu().numpy() 21 | 22 | x = hidden_states_cpu[:,:,index] 23 | 24 | w_expand = np.expand_dims(w, axis=(0,1)) 25 | 26 | project_x = x - ((np.dot(x, w) + b) / np.sqrt(np.dot(w, w))) * w_expand 27 | 28 | final_x = project_x + alpha * w_expand 29 | 30 | hidden_states_cpu[:,:,index] = final_x 31 | 32 | return hidden_states.new_tensor(hidden_states_cpu) 33 | -------------------------------------------------------------------------------- /tride/openai_sentiment_neuron/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | -------------------------------------------------------------------------------- /tride/openai_sentiment_neuron/sst_binary_demo.py: -------------------------------------------------------------------------------- 1 | from encoder import Model 2 | from matplotlib import pyplot as plt 3 | from utils import sst_binary, train_with_reg_cv 4 | 5 | model = Model() 6 | 7 | trX, vaX, teX, trY, vaY, teY = sst_binary() 8 | trXt = model.transform(trX) 9 | vaXt = model.transform(vaX) 10 | teXt = model.transform(teX) 11 | 12 | # classification results 13 | full_rep_acc, c, nnotzero = train_with_reg_cv(trXt, trY, vaXt, vaY, teXt, teY) 14 | print('%05.2f test accuracy'%full_rep_acc) 15 | print('%05.2f regularization coef'%c) 16 | print('%05d features used'%nnotzero) 17 | 18 | # visualize sentiment unit 19 | sentiment_unit = trXt[:, 2388] 20 | plt.hist(sentiment_unit[trY==0], bins=25, alpha=0.5, label='neg') 21 | plt.hist(sentiment_unit[trY==1], bins=25, alpha=0.5, label='pos') 22 | plt.legend() 23 | plt.show() 24 | -------------------------------------------------------------------------------- /tride/scripts/generate_text.py: -------------------------------------------------------------------------------- 1 | """sample text from the pretrained language model 2 | """ 3 | 4 | import argparse 5 | import torch 6 | from transformers import AutoModelForCausalLM, PreTrainedTokenizerFast 7 | 8 | parser = argparse.ArgumentParser() 9 | # parser.add_argument('--prompt', type=str, default='', 10 | # help='the prompt to start with') 11 | parser.add_argument('--model', type=str, default='gpt2-large', 12 | help='the pretrained model name') 13 | 14 | args = parser.parse_args() 15 | 16 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 17 | 18 | tokenizer = PreTrainedTokenizerFast.from_pretrained(args.model) 19 | model = AutoModelForCausalLM.from_pretrained(args.model) 20 | 21 | model.to(device) 22 | model.eval() 23 | 24 | prompt="In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English." 25 | 26 | # encode input context 27 | input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) 28 | 29 | outputs = model.generate(input_ids=None if prompt=='' else input_ids, do_sample=True, max_length=512, top_k=50) 30 | print(tokenizer.decode(outputs[0], skip_special_tokens=True)) 31 | -------------------------------------------------------------------------------- /tride/scripts/sklearn_lr_sst.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | from openai_sentiment_neuron import sst_binary, train_with_reg_cv, train_with_reg 4 | 5 | 6 | def read_input(keys): 7 | def parse_fname(fname): 8 | x = '.'.join(fname.split('.')[:-1]) 9 | x = x.split('/')[-1] 10 | x = x.split('.') 11 | 12 | size = int(x[-2].split('size')[-1]) 13 | embed = int(x[-1].split('hid')[-1]) 14 | 15 | return size, embed 16 | 17 | size, embed = parse_fname(keys) 18 | keys = np.memmap(keys, 19 | dtype=np.float32, 20 | mode='r', 21 | shape=(size, embed)) 22 | 23 | return keys 24 | 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--data', type=str, default='openai_sentiment_neuron/data', 27 | help='the data directory which consists of csv files') 28 | parser.add_argument('--train', type=str, 29 | help='the data directory which consists of csv files') 30 | parser.add_argument('--val', type=str, 31 | help='the data directory which consists of csv files') 32 | parser.add_argument('--test', type=str, 33 | help='the data directory which consists of csv files') 34 | parser.add_argument('--c', type=float, default=1, 35 | help='inverse the regularization constant') 36 | 37 | args = parser.parse_args() 38 | 39 | trX, vaX, teX, trY, vaY, teY = sst_binary(args.data) 40 | 41 | trXt = read_input(args.train) 42 | vaXt = read_input(args.val) 43 | teXt = read_input(args.test) 44 | 45 | # classification results 46 | full_rep_acc, c, nnotzero, model = train_with_reg(trXt, trY, vaXt, vaY, teXt, teY, c=args.c, verbose=1) 47 | print('%05.2f test accuracy'%full_rep_acc) 48 | print('%05.2f regularization coef'%c) 49 | print('%05d features used'%nnotzero) 50 | -------------------------------------------------------------------------------- /utils/get_modified_files.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # this script reports modified .py files under the desired list of top-level sub-dirs passed as a list of arguments, e.g.: 17 | # python ./utils/get_modified_files.py utils src tests examples 18 | # 19 | # it uses git to find the forking point and which files were modified - i.e. files not under git won't be considered 20 | # since the output of this script is fed into Makefile commands it doesn't print a newline after the results 21 | 22 | import re 23 | import subprocess 24 | import sys 25 | 26 | 27 | fork_point_sha = subprocess.check_output("git merge-base master HEAD".split()).decode("utf-8") 28 | modified_files = subprocess.check_output(f"git diff --name-only {fork_point_sha}".split()).decode("utf-8").split() 29 | 30 | joined_dirs = "|".join(sys.argv[1:]) 31 | regex = re.compile(fr"^({joined_dirs}).*?\.py$") 32 | 33 | relevant_modified_files = [x for x in modified_files if regex.match(x)] 34 | print(" ".join(relevant_modified_files), end="") 35 | --------------------------------------------------------------------------------