├── tests ├── __init__.py ├── onnx │ └── __init__.py ├── utils │ └── __init__.py ├── benchmark │ └── __init__.py ├── fixtures │ ├── empty.txt │ ├── dummy-config.json │ ├── merges.txt │ ├── input.txt │ ├── spiece.model │ ├── test_entity_vocab.json │ ├── tests_samples │ │ ├── .gitignore │ │ ├── COCO │ │ │ ├── 000000039769.png │ │ │ ├── coco_panoptic │ │ │ │ └── 000000039769.png │ │ │ └── coco_panoptic_annotations.txt │ │ ├── GermEval │ │ │ └── labels.txt │ │ └── STS-B │ │ │ ├── train.tsv │ │ │ └── dev.tsv │ ├── test_sentencepiece.model │ ├── test_sentencepiece_bpe.model │ ├── test_sentencepiece_no_bos.model │ ├── preprocessor_config.json │ ├── test_sentencepiece_bpe_char.model │ ├── vocab.txt │ ├── dummy_feature_extractor_config.json │ ├── test_sentencepiece_with_bytefallback.model │ ├── vocab.json │ └── add_distilbert_like_config.json ├── generation │ └── __init__.py ├── mixed_int8 │ └── __init__.py ├── models │ ├── __init__.py │ ├── bit │ │ └── __init__.py │ ├── cpm │ │ └── __init__.py │ ├── cvt │ │ └── __init__.py │ ├── dit │ │ └── __init__.py │ ├── dpr │ │ └── __init__.py │ ├── dpt │ │ └── __init__.py │ ├── esm │ │ └── __init__.py │ ├── git │ │ └── __init__.py │ ├── led │ │ └── __init__.py │ ├── mt5 │ │ └── __init__.py │ ├── mvp │ │ └── __init__.py │ ├── nat │ │ └── __init__.py │ ├── opt │ │ └── __init__.py │ ├── rag │ │ └── __init__.py │ ├── sew │ │ └── __init__.py │ ├── t5 │ │ └── __init__.py │ ├── van │ │ └── __init__.py │ ├── vit │ │ └── __init__.py │ ├── xlm │ │ └── __init__.py │ ├── albert │ │ └── __init__.py │ ├── align │ │ └── __init__.py │ ├── altclip │ │ └── __init__.py │ ├── auto │ │ └── __init__.py │ ├── bart │ │ └── __init__.py │ ├── barthez │ │ └── __init__.py │ ├── bartpho │ │ └── __init__.py │ ├── beit │ │ └── __init__.py │ ├── bert │ │ └── __init__.py │ ├── bertweet │ │ └── __init__.py │ ├── big_bird │ │ └── __init__.py │ ├── biogpt │ │ └── __init__.py │ ├── blip │ │ └── __init__.py │ ├── blip_2 │ │ └── __init__.py │ ├── bloom │ │ └── __init__.py │ ├── bort │ │ └── __init__.py │ ├── byt5 │ │ └── __init__.py │ ├── canine │ │ └── __init__.py │ ├── clap │ │ └── __init__.py │ ├── clip │ │ └── __init__.py │ ├── clipseg │ │ └── __init__.py │ ├── codegen │ │ └── __init__.py │ ├── convbert │ │ └── __init__.py │ ├── convnext │ │ └── __init__.py │ ├── ctrl │ │ └── __init__.py │ ├── data2vec │ │ └── __init__.py │ ├── deberta │ │ └── __init__.py │ ├── deit │ │ └── __init__.py │ ├── deta │ │ └── __init__.py │ ├── detr │ │ └── __init__.py │ ├── dinat │ │ └── __init__.py │ ├── donut │ │ └── __init__.py │ ├── electra │ │ └── __init__.py │ ├── ernie │ │ └── __init__.py │ ├── ernie_m │ │ └── __init__.py │ ├── flaubert │ │ └── __init__.py │ ├── flava │ │ └── __init__.py │ ├── fnet │ │ └── __init__.py │ ├── fsmt │ │ └── __init__.py │ ├── funnel │ │ └── __init__.py │ ├── glpn │ │ └── __init__.py │ ├── gpt2 │ │ └── __init__.py │ ├── gpt_neo │ │ └── __init__.py │ ├── gpt_neox │ │ └── __init__.py │ ├── gpt_sw3 │ │ └── __init__.py │ ├── gptj │ │ └── __init__.py │ ├── groupvit │ │ └── __init__.py │ ├── herbert │ │ └── __init__.py │ ├── hubert │ │ └── __init__.py │ ├── ibert │ │ └── __init__.py │ ├── imagegpt │ │ └── __init__.py │ ├── jukebox │ │ └── __init__.py │ ├── layoutlm │ │ └── __init__.py │ ├── levit │ │ └── __init__.py │ ├── lilt │ │ └── __init__.py │ ├── longt5 │ │ └── __init__.py │ ├── luke │ │ └── __init__.py │ ├── lxmert │ │ └── __init__.py │ ├── m2m_100 │ │ └── __init__.py │ ├── marian │ │ └── __init__.py │ ├── markuplm │ │ └── __init__.py │ ├── mbart │ │ └── __init__.py │ ├── mbart50 │ │ └── __init__.py │ ├── mctct │ │ └── __init__.py │ ├── mluke │ │ └── __init__.py │ ├── mpnet │ │ └── __init__.py │ ├── nezha │ │ └── __init__.py │ ├── nllb │ │ └── __init__.py │ ├── openai │ │ └── __init__.py │ ├── owlvit │ │ └── __init__.py │ ├── pegasus │ │ └── __init__.py │ ├── phobert │ │ └── __init__.py │ ├── plbart │ │ └── __init__.py │ ├── qdqbert │ │ └── __init__.py │ ├── realm │ │ └── __init__.py │ ├── reformer │ │ └── __init__.py │ ├── regnet │ │ └── __init__.py │ ├── rembert │ │ └── __init__.py │ ├── resnet │ │ └── __init__.py │ ├── roberta │ │ └── __init__.py │ ├── roc_bert │ │ └── __init__.py │ ├── roformer │ │ └── __init__.py │ ├── sew_d │ │ └── __init__.py │ ├── speecht5 │ │ └── __init__.py │ ├── splinter │ │ └── __init__.py │ ├── swin │ │ └── __init__.py │ ├── swin2sr │ │ └── __init__.py │ ├── swinv2 │ │ └── __init__.py │ ├── tapas │ │ └── __init__.py │ ├── tapex │ │ └── __init__.py │ ├── trocr │ │ └── __init__.py │ ├── tvlt │ │ └── __init__.py │ ├── upernet │ │ └── __init__.py │ ├── videomae │ │ └── __init__.py │ ├── vilt │ │ └── __init__.py │ ├── vit_mae │ │ └── __init__.py │ ├── vit_msn │ │ └── __init__.py │ ├── wav2vec2 │ │ └── __init__.py │ ├── wavlm │ │ └── __init__.py │ ├── whisper │ │ └── __init__.py │ ├── x_clip │ │ └── __init__.py │ ├── xglm │ │ └── __init__.py │ ├── xlnet │ │ └── __init__.py │ ├── xmod │ │ └── __init__.py │ ├── yolos │ │ └── __init__.py │ ├── yoso │ │ └── __init__.py │ ├── bert_japanese │ │ └── __init__.py │ ├── blenderbot │ │ └── __init__.py │ ├── bridgetower │ │ └── __init__.py │ ├── camembert │ │ └── __init__.py │ ├── chinese_clip │ │ └── __init__.py │ ├── deberta_v2 │ │ └── __init__.py │ ├── distilbert │ │ └── __init__.py │ ├── efficientnet │ │ └── __init__.py │ ├── graphormer │ │ └── __init__.py │ ├── layoutlmv2 │ │ └── __init__.py │ ├── layoutlmv3 │ │ └── __init__.py │ ├── layoutxlm │ │ └── __init__.py │ ├── longformer │ │ └── __init__.py │ ├── mask2former │ │ └── __init__.py │ ├── maskformer │ │ └── __init__.py │ ├── megatron_bert │ │ └── __init__.py │ ├── megatron_gpt2 │ │ └── __init__.py │ ├── mobilebert │ │ └── __init__.py │ ├── mobilenet_v1 │ │ └── __init__.py │ ├── mobilenet_v2 │ │ └── __init__.py │ ├── mobilevit │ │ └── __init__.py │ ├── nystromformer │ │ └── __init__.py │ ├── oneformer │ │ └── __init__.py │ ├── pegasus_x │ │ └── __init__.py │ ├── perceiver │ │ └── __init__.py │ ├── poolformer │ │ └── __init__.py │ ├── prophetnet │ │ └── __init__.py │ ├── retribert │ │ └── __init__.py │ ├── segformer │ │ └── __init__.py │ ├── squeezebert │ │ └── __init__.py │ ├── timesformer │ │ └── __init__.py │ ├── transfo_xl │ │ └── __init__.py │ ├── unispeech │ │ └── __init__.py │ ├── unispeech_sat │ │ └── __init__.py │ ├── visual_bert │ │ └── __init__.py │ ├── vit_hybrid │ │ └── __init__.py │ ├── xlm_roberta │ │ └── __init__.py │ ├── bert_generation │ │ └── __init__.py │ ├── bigbird_pegasus │ │ └── __init__.py │ ├── blenderbot_small │ │ └── __init__.py │ ├── conditional_detr │ │ └── __init__.py │ ├── deformable_detr │ │ └── __init__.py │ ├── efficientformer │ │ └── __init__.py │ ├── encoder_decoder │ │ └── __init__.py │ ├── gpt_neox_japanese │ │ └── __init__.py │ ├── gptsan_japanese │ │ └── __init__.py │ ├── speech_to_text │ │ └── __init__.py │ ├── speech_to_text_2 │ │ └── __init__.py │ ├── table_transformer │ │ └── __init__.py │ ├── wav2vec2_conformer │ │ └── __init__.py │ ├── wav2vec2_phoneme │ │ └── __init__.py │ ├── wav2vec2_with_lm │ │ └── __init__.py │ ├── xlm_prophetnet │ │ └── __init__.py │ ├── xlm_roberta_xl │ │ └── __init__.py │ ├── decision_transformer │ │ └── __init__.py │ ├── roberta_prelayernorm │ │ └── __init__.py │ ├── speech_encoder_decoder │ │ └── __init__.py │ ├── switch_transformers │ │ └── __init__.py │ ├── time_series_transformer │ │ └── __init__.py │ ├── trajectory_transformer │ │ └── __init__.py │ ├── vision_encoder_decoder │ │ └── __init__.py │ ├── vision_text_dual_encoder │ │ └── __init__.py │ └── audio_spectrogram_transformer │ │ └── __init__.py ├── pipelines │ └── __init__.py ├── trainer │ └── __init__.py ├── optimization │ └── __init__.py ├── tokenization │ └── __init__.py ├── deepspeed │ └── vit_feature_extractor.json └── sagemaker │ ├── __init__.py │ └── scripts │ ├── tensorflow │ └── requirements.txt │ └── pytorch │ └── requirements.txt ├── MANIFEST.in ├── utils └── test_module │ ├── __init__.py │ ├── custom_tokenization.py │ ├── custom_image_processing.py │ ├── custom_feature_extraction.py │ ├── custom_processing.py │ ├── custom_tokenization_fast.py │ ├── custom_configuration.py │ ├── custom_modeling.py │ └── custom_pipeline.py ├── src └── transformers │ ├── benchmark │ └── __init__.py │ ├── models │ ├── bort │ │ └── __init__.py │ ├── dit │ │ └── __init__.py │ ├── dialogpt │ │ └── __init__.py │ ├── yoso │ │ ├── common_cuda.h │ │ └── common.h │ ├── esm │ │ └── openfold_utils │ │ │ └── __init__.py │ ├── gpt2 │ │ └── CONVERSION.md │ ├── megatron_gpt2 │ │ └── __init__.py │ ├── deformable_detr │ │ └── custom_kernel │ │ │ ├── vision.cpp │ │ │ ├── cuda │ │ │ └── ms_deform_attn_cuda.h │ │ │ └── cpu │ │ │ └── ms_deform_attn_cpu.h │ ├── tapex │ │ └── __init__.py │ ├── byt5 │ │ └── __init__.py │ ├── phobert │ │ └── __init__.py │ ├── bertweet │ │ └── __init__.py │ ├── wav2vec2_with_lm │ │ └── __init__.py │ ├── wav2vec2_phoneme │ │ └── __init__.py │ ├── bert_japanese │ │ └── __init__.py │ ├── dpt │ │ └── feature_extraction_dpt.py │ ├── vit │ │ └── feature_extraction_vit.py │ ├── beit │ │ └── feature_extraction_beit.py │ ├── clip │ │ └── feature_extraction_clip.py │ ├── deit │ │ └── feature_extraction_deit.py │ └── detr │ │ └── feature_extraction_detr.py │ ├── utils │ ├── constants.py │ ├── dummy_keras_nlp_objects.py │ ├── dummy_sentencepiece_and_tokenizers_objects.py │ ├── dummy_tensorflow_text_objects.py │ ├── dummy_detectron2_objects.py │ └── dummy_speech_objects.py │ ├── sagemaker │ ├── __init__.py │ └── trainer_sm.py │ ├── data │ ├── datasets │ │ └── __init__.py │ └── processors │ │ └── __init__.py │ ├── commands │ └── __init__.py │ ├── generation_tf_utils.py │ ├── generation_flax_utils.py │ └── generation_utils.py ├── examples ├── research_projects │ ├── bertabs │ │ ├── __init__.py │ │ └── requirements.txt │ ├── deebert │ │ ├── src │ │ │ └── __init__.py │ │ ├── requirements.txt │ │ ├── eval_deebert.sh │ │ ├── entropy_eval.sh │ │ └── train_deebert.sh │ ├── codeparrot │ │ ├── scripts │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_deduplicate.py │ │ │ ├── initialize_model.py │ │ │ └── bpe_training.py │ │ ├── examples │ │ │ └── requirements.txt │ │ └── requirements.txt │ ├── bert-loses-patience │ │ ├── pabee │ │ │ └── __init__.py │ │ └── requirements.txt │ ├── fsner │ │ ├── requirements.txt │ │ ├── src │ │ │ └── fsner │ │ │ │ └── __init__.py │ │ ├── pyproject.toml │ │ └── setup.py │ ├── information-gain-filtration │ │ ├── igf │ │ │ └── __init__.py │ │ ├── requirements.txt │ │ └── result_igf.png │ ├── onnx │ │ └── summarization │ │ │ └── requirements.txt │ ├── adversarial │ │ └── requirements.txt │ ├── bertology │ │ └── requirements.txt │ ├── tapex │ │ └── requirements.txt │ ├── layoutlmv3 │ │ └── requirements.txt │ ├── longform-qa │ │ ├── requirements.txt │ │ └── README.md │ ├── mlm_wwm │ │ └── requirements.txt │ ├── xtreme-s │ │ └── requirements.txt │ ├── pplm │ │ ├── imgs │ │ │ ├── wooly.png │ │ │ └── headfigure.png │ │ ├── requirements.txt │ │ └── pplm_classification_head.py │ ├── rag │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── finetune_rag.sh │ ├── wav2vec2 │ │ ├── requirements.txt │ │ ├── run_alignment.sh │ │ ├── finetune_base_100.sh │ │ ├── finetune_large_lv60_100.sh │ │ ├── finetune_base_timit_asr.sh │ │ ├── finetune_large_lv60_timit_asr.sh │ │ ├── finetune_wav2vec2_xlsr_turkish.sh │ │ ├── finetune_large_xlsr_53_arabic_speech_corpus.sh │ │ └── vocab │ │ │ └── buckwalter.json │ ├── distillation │ │ ├── requirements.txt │ │ └── training_configs │ │ │ ├── distilgpt2.json │ │ │ ├── distilbert-base-cased.json │ │ │ ├── distilbert-base-uncased.json │ │ │ ├── distilbert-base-multilingual-cased.json │ │ │ └── distilroberta-base.json │ ├── jax-projects │ │ ├── big_bird │ │ │ ├── requirements.txt │ │ │ └── sweep_flax.yaml │ │ └── hybrid_clip │ │ │ └── requirements.txt │ ├── movement-pruning │ │ ├── emmental │ │ │ ├── modules │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ └── requirements.txt │ ├── self-training-text-classification │ │ └── requirements.txt │ ├── rag-end2end-retriever │ │ ├── requirements.txt │ │ └── test_run │ │ │ ├── dummy-train-data │ │ │ ├── test.target │ │ │ ├── val.target │ │ │ ├── val.source │ │ │ └── test.source │ │ │ └── test_rag_new_features.sh │ ├── lxmert │ │ └── README.md │ ├── performer │ │ ├── full_script.sh │ │ └── sanity_script.sh │ ├── seq2seq-distillation │ │ ├── requirements.txt │ │ ├── finetune.sh │ │ ├── finetune_t5.sh │ │ ├── finetune_pegasus_xsum.sh │ │ ├── train_mbart_cc25_enro.sh │ │ ├── dynamic_bs_example.sh │ │ ├── sentence_splitter.py │ │ ├── distil_marian_no_teacher.sh │ │ ├── train_distilbart_cnn.sh │ │ ├── distil_marian_enro_teacher.sh │ │ ├── train_distilbart_xsum.sh │ │ └── finetune_bart_tiny.sh │ ├── visual_bert │ │ └── README.md │ ├── vqgan-clip │ │ ├── requirements.txt │ │ └── utils.py │ ├── mm-imdb │ │ └── README.md │ └── README.md ├── pytorch │ ├── benchmarking │ │ └── requirements.txt │ ├── image-pretraining │ │ └── requirements.txt │ ├── text-generation │ │ ├── requirements.txt │ │ └── README.md │ ├── contrastive-image-text │ │ └── requirements.txt │ ├── audio-classification │ │ └── requirements.txt │ ├── question-answering │ │ └── requirements.txt │ ├── token-classification │ │ ├── requirements.txt │ │ ├── run.sh │ │ └── run_no_trainer.sh │ ├── speech-pretraining │ │ └── requirements.txt │ ├── speech-recognition │ │ └── requirements.txt │ ├── image-classification │ │ └── requirements.txt │ ├── multiple-choice │ │ ├── requirements.txt │ │ └── run_no_trainer.sh │ ├── semantic-segmentation │ │ └── requirements.txt │ ├── language-modeling │ │ └── requirements.txt │ ├── text-classification │ │ └── requirements.txt │ ├── translation │ │ └── requirements.txt │ ├── summarization │ │ └── requirements.txt │ └── _tests_requirements.txt ├── legacy │ ├── seq2seq │ │ ├── test_data │ │ │ ├── test_data │ │ │ ├── wmt_en_ro │ │ │ │ ├── val.len │ │ │ │ └── train.len │ │ │ └── fsmt │ │ │ │ └── build-eval-data.py │ │ ├── __init__.py │ │ ├── requirements.txt │ │ ├── finetune.sh │ │ ├── finetune_tpu.sh │ │ ├── minify_dataset.py │ │ └── rouge_cli.py │ ├── pytorch-lightning │ │ ├── requirements.txt │ │ ├── run_glue.sh │ │ └── run_pos.sh │ ├── README.md │ └── token-classification │ │ ├── run_pos.sh │ │ ├── scripts │ │ └── preprocess.py │ │ └── run_chunk.sh ├── tensorflow │ ├── benchmarking │ │ └── requirements.txt │ ├── language-modeling │ │ └── requirements.txt │ ├── image-classification │ │ └── requirements.txt │ ├── summarization │ │ └── requirements.txt │ ├── translation │ │ └── requirements.txt │ ├── multiple-choice │ │ └── requirements.txt │ ├── question-answering │ │ └── requirements.txt │ ├── token-classification │ │ └── requirements.txt │ ├── text-classification │ │ └── requirements.txt │ └── _tests_requirements.txt └── flax │ ├── language-modeling │ └── requirements.txt │ ├── question-answering │ └── requirements.txt │ ├── text-classification │ └── requirements.txt │ ├── token-classification │ └── requirements.txt │ ├── _tests_requirements.txt │ ├── summarization │ └── requirements.txt │ └── vision │ └── requirements.txt ├── docs └── source │ ├── en │ ├── contributing.md │ ├── notebooks.md │ ├── _config.py │ ├── perf_infer_special.mdx │ ├── main_classes │ │ ├── keras_callbacks.mdx │ │ ├── configuration.mdx │ │ └── image_processor.mdx │ ├── perf_train_tpu.mdx │ ├── perf_train_special.mdx │ ├── sagemaker.mdx │ └── perf_infer_gpu_many.mdx │ ├── fr │ ├── in_translation.mdx │ └── _config.py │ ├── ko │ ├── in_translation.mdx │ └── _config.py │ ├── zh │ └── _toctree.yml │ ├── ja │ └── _toctree.yml │ ├── de │ ├── _config.py │ └── _toctree.yml │ ├── es │ ├── _config.py │ └── sagemaker.mdx │ ├── pt │ └── _config.py │ ├── _config.py │ └── it │ └── _config.py ├── .gitattributes ├── .github ├── conda │ ├── build.sh │ └── meta.yaml ├── workflows │ ├── delete_doc_comment.yml │ ├── TROUBLESHOOT.md │ ├── build_pr_documentation.yml │ ├── build_documentation.yml │ ├── stale.yml │ ├── update_metdata.yml │ └── release-conda.yml └── ISSUE_TEMPLATE │ ├── config.yml │ ├── new-model-addition.yml │ └── feature-request.yml ├── templates ├── adding_a_new_model │ ├── open_model_proposals │ │ └── README.md │ ├── tests │ │ ├── pt-encoder-bert-tokenizer.json │ │ ├── tf-encoder-bert-tokenizer.json │ │ ├── encoder-bert-tokenizer.json │ │ ├── flax-encoder-bert-tokenizer.json │ │ ├── standalone.json │ │ ├── pt-seq-2-seq-bart-tokenizer.json │ │ ├── flax-seq-2-seq-bart-tokenizer.json │ │ └── tf-seq-2-seq-bart-tokenizer.json │ ├── cookiecutter.json │ └── cookiecutter-template-{{cookiecutter.modelname}} │ │ └── configuration.json ├── adding_a_new_example_script │ └── cookiecutter.json └── adding_a_missing_tokenization_test │ └── cookiecutter.json ├── .coveragerc ├── docker ├── transformers-pytorch-tpu │ ├── docker-entrypoint.sh │ ├── dataset.yaml │ └── bert-base-cased.jsonnet ├── transformers-pytorch-cpu │ └── Dockerfile ├── transformers-tensorflow-cpu │ └── Dockerfile ├── transformers-cpu │ └── Dockerfile ├── transformers-gpu │ └── Dockerfile ├── transformers-doc-builder │ └── Dockerfile └── transformers-tensorflow-gpu │ └── Dockerfile ├── .circleci └── TROUBLESHOOT.md ├── scripts ├── tatoeba │ └── upload_models.sh └── fsmt │ └── tests-to-run.sh ├── setup.cfg ├── pyproject.toml └── model_cards └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/empty.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/generation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/mixed_int8/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/cpm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/cvt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dpr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/esm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/git/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/led/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mt5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mvp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/rag/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/sew/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/t5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/van/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | -------------------------------------------------------------------------------- /tests/models/albert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/align/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/altclip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/auto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bart/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/barthez/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bartpho/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/beit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bertweet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/big_bird/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/biogpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/blip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/blip_2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bloom/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bort/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/byt5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/canine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/clap/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/clip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/clipseg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/codegen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/convbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/convnext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/ctrl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/data2vec/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deberta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/detr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dinat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/donut/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/electra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/ernie/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/ernie_m/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/flaubert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/flava/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/fnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/fsmt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/funnel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/glpn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_neo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_neox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_sw3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gptj/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/groupvit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/herbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/hubert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/ibert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/imagegpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/jukebox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/layoutlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/levit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/lilt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/longt5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/luke/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/lxmert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/m2m_100/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/marian/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/markuplm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mbart/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mbart50/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mctct/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mluke/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mpnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nezha/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nllb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/owlvit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/pegasus/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/phobert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/plbart/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/qdqbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/realm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/reformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/regnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/rembert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/roberta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/roc_bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/roformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/sew_d/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/speecht5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/splinter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/swin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/swin2sr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/swinv2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/tapas/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/tapex/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/trocr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/tvlt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/upernet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/videomae/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vilt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vit_mae/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vit_msn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wav2vec2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wavlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/whisper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/x_clip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xglm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xmod/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/yolos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/yoso/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tokenization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/test_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bert_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/blenderbot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bridgetower/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/camembert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/chinese_clip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deberta_v2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/distilbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/efficientnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/graphormer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/layoutlmv2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/layoutlmv3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/layoutxlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/longformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mask2former/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/maskformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/megatron_bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/megatron_gpt2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilebert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilenet_v1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilenet_v2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilevit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nystromformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/oneformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/pegasus_x/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/perceiver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/poolformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/prophetnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/retribert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/segformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/squeezebert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/timesformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/transfo_xl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/unispeech/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/unispeech_sat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/visual_bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vit_hybrid/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlm_roberta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/models/bort/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/models/dit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bert_generation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bigbird_pegasus/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/blenderbot_small/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/conditional_detr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deformable_detr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/efficientformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_neox_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gptsan_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/speech_to_text/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/speech_to_text_2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/table_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wav2vec2_conformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wav2vec2_phoneme/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wav2vec2_with_lm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlm_prophetnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlm_roberta_xl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/bertabs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/models/dialogpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/decision_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/roberta_prelayernorm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/speech_encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/switch_transformers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/time_series_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/trajectory_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vision_encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vision_text_dual_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/en/contributing.md: -------------------------------------------------------------------------------- 1 | ../../../CONTRIBUTING.md -------------------------------------------------------------------------------- /docs/source/en/notebooks.md: -------------------------------------------------------------------------------- 1 | ../../../notebooks/README.md -------------------------------------------------------------------------------- /docs/source/fr/in_translation.mdx: -------------------------------------------------------------------------------- 1 | # Traduction en cours. -------------------------------------------------------------------------------- /docs/source/ko/in_translation.mdx: -------------------------------------------------------------------------------- 1 | # 열심히 번역 중입니다. 조금 이따 만나요! -------------------------------------------------------------------------------- /examples/pytorch/benchmarking/requirements.txt: -------------------------------------------------------------------------------- 1 | torch >= 1.3 -------------------------------------------------------------------------------- /tests/models/audio_spectrogram_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/test_data: -------------------------------------------------------------------------------- 1 | seq2seq/test_data -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/scripts/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/bert-loses-patience/pabee/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/fsner/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.9.2 -------------------------------------------------------------------------------- /examples/tensorflow/benchmarking/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 2.3 -------------------------------------------------------------------------------- /examples/research_projects/information-gain-filtration/igf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/dummy-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_type": "roberta" 3 | } -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py eol=lf 2 | *.rst eol=lf 3 | *.md eol=lf 4 | *.mdx eol=lf -------------------------------------------------------------------------------- /examples/research_projects/deebert/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /examples/research_projects/onnx/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | torch >= 1.10 -------------------------------------------------------------------------------- /tests/fixtures/merges.txt: -------------------------------------------------------------------------------- 1 | #version: 0.2 2 | Ġ l 3 | Ġl o 4 | Ġlo w 5 | e r 6 | -------------------------------------------------------------------------------- /examples/research_projects/adversarial/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /examples/research_projects/bertology/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /tests/fixtures/input.txt: -------------------------------------------------------------------------------- 1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer 2 | -------------------------------------------------------------------------------- /examples/research_projects/bert-loses-patience/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 -------------------------------------------------------------------------------- /examples/research_projects/tapex/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | datasets 3 | pandas 4 | nltk -------------------------------------------------------------------------------- /examples/research_projects/layoutlmv3/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | seqeval 3 | pillow 4 | -------------------------------------------------------------------------------- /.github/conda/build.sh: -------------------------------------------------------------------------------- 1 | $PYTHON setup.py install # Python command to install the script. 2 | -------------------------------------------------------------------------------- /examples/tensorflow/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 -------------------------------------------------------------------------------- /examples/pytorch/image-pretraining/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.5.0 2 | torchvision>=0.6.0 3 | datasets>=1.8.0 -------------------------------------------------------------------------------- /examples/pytorch/text-generation/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece != 0.1.92 2 | protobuf 3 | torch >= 1.3 4 | -------------------------------------------------------------------------------- /examples/pytorch/contrastive-image-text/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.5.0 2 | torchvision>=0.6.0 3 | datasets>=1.8.0 -------------------------------------------------------------------------------- /examples/tensorflow/image-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets>=1.17.0 2 | evaluate 3 | tensorflow>=2.4 4 | -------------------------------------------------------------------------------- /examples/tensorflow/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /examples/tensorflow/translation/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /tests/fixtures/spiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/tests/fixtures/spiece.model -------------------------------------------------------------------------------- /tests/fixtures/test_entity_vocab.json: -------------------------------------------------------------------------------- 1 | {"[MASK]": 0, "[UNK]": 1, "[PAD]": 2, "DUMMY": 3, "DUMMY2": 4, "[MASK2]": 5} -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/.gitignore: -------------------------------------------------------------------------------- 1 | cache* 2 | temp* 3 | !*.txt 4 | !*.tsv 5 | !*.json 6 | !.gitignore -------------------------------------------------------------------------------- /examples/tensorflow/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece != 0.1.92 2 | protobuf 3 | tensorflow >= 2.3 4 | -------------------------------------------------------------------------------- /examples/tensorflow/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /examples/research_projects/bertabs/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | 3 | # For ROUGE 4 | nltk 5 | py-rouge 6 | -------------------------------------------------------------------------------- /examples/research_projects/longform-qa/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | faiss-cpu 3 | streamlit 4 | elasticsearch 5 | -------------------------------------------------------------------------------- /examples/research_projects/mlm_wwm/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | sentencepiece != 0.1.92 3 | protobuf 4 | ltp 5 | -------------------------------------------------------------------------------- /examples/tensorflow/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /examples/pytorch/audio-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets>=1.14.0 2 | evaluate 3 | librosa 4 | torchaudio 5 | torch>=1.6 -------------------------------------------------------------------------------- /examples/pytorch/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | datasets >= 1.8.0 3 | torch >= 1.3.0 4 | evaluate -------------------------------------------------------------------------------- /examples/research_projects/xtreme-s/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.18.0 2 | torch >= 1.5 3 | torchaudio 4 | librosa 5 | jiwer 6 | -------------------------------------------------------------------------------- /tests/deepspeed/vit_feature_extractor.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_extractor_type": "ViTFeatureExtractor", 3 | "size": 30 4 | } 5 | -------------------------------------------------------------------------------- /examples/flax/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.5 5 | optax>=0.0.9 6 | -------------------------------------------------------------------------------- /examples/flax/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | jax>=0.2.17 3 | jaxlib>=0.1.68 4 | flax>=0.3.5 5 | optax>=0.0.8 -------------------------------------------------------------------------------- /examples/pytorch/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | seqeval 3 | datasets >= 1.8.0 4 | torch >= 1.3 5 | evaluate -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/tests/fixtures/test_sentencepiece.model -------------------------------------------------------------------------------- /examples/flax/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.5 5 | optax>=0.0.8 6 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__))) 6 | -------------------------------------------------------------------------------- /examples/pytorch/speech-pretraining/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.12.0 2 | torch >= 1.5 3 | torchaudio 4 | accelerate >= 0.12.0 5 | librosa -------------------------------------------------------------------------------- /examples/pytorch/speech-recognition/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.18.0 2 | torch >= 1.5 3 | torchaudio 4 | librosa 5 | jiwer 6 | evaluate 7 | -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_bpe.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/tests/fixtures/test_sentencepiece_bpe.model -------------------------------------------------------------------------------- /examples/flax/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.5 5 | optax>=0.0.8 6 | seqeval -------------------------------------------------------------------------------- /examples/pytorch/image-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.12.0 2 | torch>=1.5.0 3 | torchvision>=0.6.0 4 | datasets>=1.17.0 5 | evaluate -------------------------------------------------------------------------------- /examples/pytorch/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | sentencepiece != 0.1.92 3 | protobuf 4 | torch >= 1.3 5 | evaluate 6 | -------------------------------------------------------------------------------- /examples/research_projects/pplm/imgs/wooly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/examples/research_projects/pplm/imgs/wooly.png -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_no_bos.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/tests/fixtures/test_sentencepiece_no_bos.model -------------------------------------------------------------------------------- /utils/test_module/custom_tokenization.py: -------------------------------------------------------------------------------- 1 | from transformers import BertTokenizer 2 | 3 | 4 | class CustomTokenizer(BertTokenizer): 5 | pass 6 | -------------------------------------------------------------------------------- /examples/flax/_tests_requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | pytest 3 | conllu 4 | nltk 5 | rouge-score 6 | seqeval 7 | tensorboard 8 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /examples/flax/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.5 5 | optax>=0.0.8 6 | evaluate>=0.2.0 7 | -------------------------------------------------------------------------------- /examples/pytorch/semantic-segmentation/requirements.txt: -------------------------------------------------------------------------------- 1 | git://github.com/huggingface/accelerate.git 2 | datasets >= 2.0.0 3 | torch >= 1.3 4 | evaluate -------------------------------------------------------------------------------- /examples/research_projects/rag/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__))) 6 | -------------------------------------------------------------------------------- /tests/fixtures/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_extractor_type": "Wav2Vec2FeatureExtractor", 3 | "processor_class": "Wav2Vec2Processor" 4 | } -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_bpe_char.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/tests/fixtures/test_sentencepiece_bpe_char.model -------------------------------------------------------------------------------- /docs/source/zh/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: 🤗 Transformers简介 4 | - local: quicktour 5 | title: 快速上手 6 | title: 开始使用 -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/wmt_en_ro/val.len: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/examples/legacy/seq2seq/test_data/wmt_en_ro/val.len -------------------------------------------------------------------------------- /examples/research_projects/pplm/imgs/headfigure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/examples/research_projects/pplm/imgs/headfigure.png -------------------------------------------------------------------------------- /examples/tensorflow/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | sentencepiece != 0.1.92 3 | protobuf 4 | tensorflow >= 2.3 5 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/000000039769.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/tests/fixtures/tests_samples/COCO/000000039769.png -------------------------------------------------------------------------------- /tests/sagemaker/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | 4 | def is_sagemaker_available(): 5 | return importlib.util.find_spec("sagemaker") is not None 6 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/wmt_en_ro/train.len: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/examples/legacy/seq2seq/test_data/wmt_en_ro/train.len -------------------------------------------------------------------------------- /tests/fixtures/vocab.txt: -------------------------------------------------------------------------------- 1 | [PAD] 2 | [SEP] 3 | [MASK] 4 | [CLS] 5 | [unused3] 6 | [unused4] 7 | [unused5] 8 | [unused6] 9 | [unused7] 10 | [unused8] 11 | -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/examples/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets==2.3.2 2 | transformers==4.21.1 3 | wandb==0.13.1 4 | evaluate==0.2.2 5 | scikit-learn==1.1.2 -------------------------------------------------------------------------------- /examples/research_projects/information-gain-filtration/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy>=1.17.2 3 | joblib>=0.13.2 4 | scipy 5 | torch>=1.10.1 6 | transformers>=3.5 -------------------------------------------------------------------------------- /tests/fixtures/dummy_feature_extractor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_extractor_type": "Wav2Vec2FeatureExtractor", 3 | "processor_class": "Wav2Vec2Processor" 4 | } 5 | -------------------------------------------------------------------------------- /utils/test_module/custom_image_processing.py: -------------------------------------------------------------------------------- 1 | from transformers import CLIPImageProcessor 2 | 3 | 4 | class CustomImageProcessor(CLIPImageProcessor): 5 | pass 6 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | datasets 3 | torch>=1.5.0 4 | torchaudio 5 | jiwer==2.2.0 6 | lang-trans==0.6.0 7 | librosa==0.8.0 8 | -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_with_bytefallback.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/tests/fixtures/test_sentencepiece_with_bytefallback.model -------------------------------------------------------------------------------- /templates/adding_a_new_model/open_model_proposals/README.md: -------------------------------------------------------------------------------- 1 | Currently the following model proposals are available: 2 | 3 | - [BigBird (Google)](./ADD_BIG_BIRD.md) 4 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | 3 | gitpython==3.1.30 4 | tensorboard>=1.14.0 5 | tensorboardX==1.8 6 | psutil==5.6.6 7 | scipy>=1.4.1 8 | -------------------------------------------------------------------------------- /utils/test_module/custom_feature_extraction.py: -------------------------------------------------------------------------------- 1 | from transformers import Wav2Vec2FeatureExtractor 2 | 3 | 4 | class CustomFeatureExtractor(Wav2Vec2FeatureExtractor): 5 | pass 6 | -------------------------------------------------------------------------------- /examples/pytorch/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | torch >= 1.3 3 | datasets >= 1.8.0 4 | sentencepiece != 0.1.92 5 | protobuf 6 | evaluate 7 | scikit-learn 8 | -------------------------------------------------------------------------------- /examples/research_projects/jax-projects/big_bird/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers@main 2 | datasets 3 | sentencepiece 4 | wandb 5 | flax 6 | jsonlines 7 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/coco_panoptic/000000039769.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/tests/fixtures/tests_samples/COCO/coco_panoptic/000000039769.png -------------------------------------------------------------------------------- /examples/pytorch/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | scipy 5 | scikit-learn 6 | protobuf 7 | torch >= 1.3 8 | evaluate -------------------------------------------------------------------------------- /examples/pytorch/translation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | protobuf 5 | sacrebleu >= 1.4.12 6 | py7zr 7 | torch >= 1.3 8 | evaluate -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/emmental/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer 2 | from .masked_nn import MaskedLinear 3 | -------------------------------------------------------------------------------- /tests/sagemaker/scripts/tensorflow/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers.git@main # install main or adjust ist with vX.X.X for installing version specific transforms -------------------------------------------------------------------------------- /examples/research_projects/information-gain-filtration/result_igf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpaviot/transformers/main/examples/research_projects/information-gain-filtration/result_igf.png -------------------------------------------------------------------------------- /examples/research_projects/fsner/src/fsner/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import FSNERModel 2 | from .tokenizer_utils import FSNERTokenizerUtils 3 | 4 | 5 | __all__ = ["FSNERModel", "FSNERTokenizerUtils"] 6 | -------------------------------------------------------------------------------- /examples/research_projects/self-training-text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | datasets >= 1.8.0 3 | protobuf 4 | scikit-learn 5 | scipy 6 | sentencepiece != 0.1.92 7 | torch >= 1.3 8 | -------------------------------------------------------------------------------- /tests/sagemaker/scripts/pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers.git@main # install main or adjust it with vX.X.X for installing version specific transforms 2 | datasets==1.8.0 -------------------------------------------------------------------------------- /examples/pytorch/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | protobuf 5 | rouge-score 6 | nltk 7 | py7zr 8 | torch >= 1.3 9 | evaluate 10 | -------------------------------------------------------------------------------- /examples/research_projects/fsner/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=57.4.0", 4 | "wheel>=0.37.0", 5 | "transformers>=4.9.2" 6 | ] 7 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu >= 1.7.2 2 | datasets 3 | psutil >= 5.9.1 4 | torch >= 1.11.0 5 | pytorch-lightning == 1.6.4 6 | nvidia-ml-py3 == 7.352.0 7 | ray >= 1.13.0 -------------------------------------------------------------------------------- /examples/research_projects/rag/requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu >= 1.6.3 2 | datasets >= 1.0.1 3 | psutil >= 5.7.0 4 | torch >= 1.4.0 5 | ray >= 1.10.0 6 | pytorch-lightning >= 1.5.10, <=1.6.0 7 | transformers 8 | GitPython -------------------------------------------------------------------------------- /utils/test_module/custom_processing.py: -------------------------------------------------------------------------------- 1 | from transformers import ProcessorMixin 2 | 3 | 4 | class CustomProcessor(ProcessorMixin): 5 | feature_extractor_class = "AutoFeatureExtractor" 6 | tokenizer_class = "AutoTokenizer" 7 | -------------------------------------------------------------------------------- /examples/research_projects/lxmert/README.md: -------------------------------------------------------------------------------- 1 | # LXMERT DEMO 2 | 3 | 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate`` 4 | 2. install reqs: ``pip install -r ./requirements.txt`` 5 | 3. usage is as shown in demo.ipynb 6 | -------------------------------------------------------------------------------- /tests/fixtures/vocab.json: -------------------------------------------------------------------------------- 1 | {"l": 0, "o": 1, "w": 2, "e": 3, "r": 4, "s": 5, "t": 6, "i": 7, "d": 8, "n": 9, "Ġ": 10, "Ġl": 11, "Ġn": 12, "Ġlo": 13, "Ġlow": 14, "er": 15, "Ġlowest": 16, "Ġnewer": 17, "Ġwider": 18, "": 19, "<|endoftext|>": 20} 2 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilgpt2.json: -------------------------------------------------------------------------------- 1 | { 2 | "initializer_range": 0.02, 3 | "layer_norm_epsilon": 0.00001, 4 | "n_embd": 768, 5 | "n_head": 12, 6 | "n_layer": 6, 7 | "n_positions": 1024, 8 | "vocab_size": 50257 9 | } -------------------------------------------------------------------------------- /docs/source/ja/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: 🤗 Transformers 4 | - local: installation 5 | title: インストール 6 | title: はじめに 7 | - sections: 8 | - sections: 9 | - local: multilingual 10 | title: 推論のための多言語モデル -------------------------------------------------------------------------------- /examples/flax/vision/requirements.txt: -------------------------------------------------------------------------------- 1 | jax>=0.2.8 2 | jaxlib>=0.1.59 3 | flax>=0.3.5 4 | optax>=0.0.8 5 | -f https://download.pytorch.org/whl/torch_stable.html 6 | torch==1.9.0+cpu 7 | -f https://download.pytorch.org/whl/torch_stable.html 8 | torchvision==0.10.0+cpu -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.4.0 2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers 3 | knockknock>=0.1.8.1 4 | h5py>=2.10.0 5 | numpy>=1.18.2 6 | scipy>=1.4.1 7 | -------------------------------------------------------------------------------- /utils/test_module/custom_tokenization_fast.py: -------------------------------------------------------------------------------- 1 | from transformers import BertTokenizerFast 2 | 3 | from .custom_tokenization import CustomTokenizer 4 | 5 | 6 | class CustomTokenizerFast(BertTokenizerFast): 7 | slow_tokenizer_class = CustomTokenizer 8 | pass 9 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=transformers 3 | omit = 4 | # skip convertion scripts from testing for now 5 | */convert_* 6 | */__main__.py 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | raise 11 | except 12 | register_parameter -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/run_alignment.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python alignment.py \ 3 | --model_name="arijitx/wav2vec2-xls-r-300m-bengali" \ 4 | --wav_dir="./wavs" \ 5 | --text_file="script.txt" \ 6 | --input_wavs_sr=48000 \ 7 | --output_dir="./out_alignment" \ 8 | --cuda 9 | -------------------------------------------------------------------------------- /examples/research_projects/jax-projects/hybrid_clip/requirements.txt: -------------------------------------------------------------------------------- 1 | jax>=0.2.8 2 | jaxlib>=0.1.59 3 | flax>=0.3.5 4 | optax>=0.0.8 5 | -f https://download.pytorch.org/whl/torch_stable.html 6 | torch==1.9.0+cpu 7 | -f https://download.pytorch.org/whl/torch_stable.html 8 | torchvision==0.10.0+cpu -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/test.target: -------------------------------------------------------------------------------- 1 | to a snake 2 | Moses' assistant 3 | Egyptian royal court 4 | let his rod turn in to a snake 5 | The Pokémon Company 6 | Nintendo 7 | world's top-selling toy brand, the top-selling trading card game 8 | over 20 seasons 9 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/val.target: -------------------------------------------------------------------------------- 1 | to a snake 2 | Moses' assistant 3 | Egyptian royal court 4 | let his rod turn in to a snake 5 | The Pokémon Company 6 | Nintendo 7 | world's top-selling toy brand, the top-selling trading card game 8 | over 20 seasons -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.19.0 2 | datasets==1.16.0 3 | wandb==0.12.0 4 | tensorboard==2.6.0 5 | torch==1.11.0 6 | huggingface-hub==0.1.0 7 | git+https://github.com/huggingface/accelerate.git@3c45b6f760ad8745be9ebc9bbb26f5b04dea4abe 8 | datasketch==1.5.7 9 | dpu_utils -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source ~/.bashrc 3 | echo "running docker-entrypoint.sh" 4 | conda activate container 5 | echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS 6 | echo "printed TPU info" 7 | export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}" 8 | exec "$@"#!/bin/bash 9 | -------------------------------------------------------------------------------- /src/transformers/utils/constants.py: -------------------------------------------------------------------------------- 1 | IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406] 2 | IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225] 3 | IMAGENET_STANDARD_MEAN = [0.5, 0.5, 0.5] 4 | IMAGENET_STANDARD_STD = [0.5, 0.5, 0.5] 5 | OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073] 6 | OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711] 7 | -------------------------------------------------------------------------------- /src/transformers/models/yoso/common_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | #define MAX_THREADS_PER_BLOCK 1024 3 | #define OPTIMAL_THREADS_PER_BLOCK 256 4 | #define WARP_SIZE 32 5 | #define MAX_NUM_BLOCK_X 2147483647 6 | #define MAX_NUM_BLOCK_Y 65535 7 | #define MAX_NUM_BLOCK_Z 65535 8 | #define MAX_SHARED_MEM_PER_BLOCK 48000 9 | #define FULL_MASK 0xffffffff 10 | -------------------------------------------------------------------------------- /.github/workflows/delete_doc_comment.yml: -------------------------------------------------------------------------------- 1 | name: Delete dev documentation 2 | 3 | on: 4 | pull_request: 5 | types: [ closed ] 6 | 7 | 8 | jobs: 9 | delete: 10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main 11 | with: 12 | pr_number: ${{ github.event.number }} 13 | package: transformers 14 | -------------------------------------------------------------------------------- /src/transformers/models/yoso/common.h: -------------------------------------------------------------------------------- 1 | 2 | #define min(a, b) ((a)<(b)?(a):(b)) 3 | #define max(a, b) ((a)>(b)?(a):(b)) 4 | #define ceil_divide(a, b) ((a)/(b)+((a)%(b)!=0)) 5 | #define select(cond, a, b) ((cond)?(a):(b)) 6 | #define PI 3.141592 7 | #define EPSILON 1e-8 8 | #define MAX_VAL 1e12 9 | #define MIN_VAL -1e12 10 | #define EMPTY_VALUE -1 11 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/val.source: -------------------------------------------------------------------------------- 1 | What does Moses' rod turn into ? 2 | Who is Aron? 3 | Where did Moses grow up ? 4 | What happens at the command of the Moses ? 5 | Who manages the Pokémon ? 6 | Who owned the Pokémon trademark ? 7 | What else include in Pokémon franchise ? 8 | How many seasons in Pokémon animme series ? -------------------------------------------------------------------------------- /.circleci/TROUBLESHOOT.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | This is a document explaining how to deal with various issues on Circle-CI. The entries may include actually solutions or pointers to Issues that cover those. 4 | 5 | ## Circle CI 6 | 7 | * pytest worker runs out of resident RAM and gets killed by `cgroups`: https://github.com/huggingface/transformers/issues/11408 8 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/test.source: -------------------------------------------------------------------------------- 1 | What does Moses' rod turn into ? 2 | Who is Aron? 3 | Where did Moses grow up ? 4 | What happens at the command of the Moses ? 5 | Who manages the Pokémon ? 6 | Who owned the Pokémon trademark ? 7 | What else include in Pokémon franchise ? 8 | How many seasons in Pokémon animme series ? 9 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | ray 22 | -------------------------------------------------------------------------------- /scripts/tatoeba/upload_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for FILE in converted/*; do 4 | model_name=`basename $FILE` 5 | huggingface-cli repo create $model_name -y 6 | git clone https://huggingface.co/Helsinki-NLP/$model_name 7 | mv $FILE/* $model_name/ 8 | cd $model_name 9 | git add . && git commit -m "initial commit" 10 | git push 11 | cd .. 12 | done 13 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_keras_nlp_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class TFGPT2Tokenizer(metaclass=DummyObject): 6 | _backends = ["keras_nlp"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["keras_nlp"]) 10 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/GermEval/labels.txt: -------------------------------------------------------------------------------- 1 | B-LOC 2 | B-LOCderiv 3 | B-LOCpart 4 | B-ORG 5 | B-ORGderiv 6 | B-ORGpart 7 | B-OTH 8 | B-OTHderiv 9 | B-OTHpart 10 | B-PER 11 | B-PERderiv 12 | B-PERpart 13 | I-LOC 14 | I-LOCderiv 15 | I-LOCpart 16 | I-ORG 17 | I-ORGderiv 18 | I-ORGpart 19 | I-OTH 20 | I-OTHderiv 21 | I-OTHpart 22 | I-PER 23 | I-PERderiv 24 | I-PERpart 25 | O 26 | -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/emmental/__init__.py: -------------------------------------------------------------------------------- 1 | from .configuration_bert_masked import MaskedBertConfig 2 | from .modeling_bert_masked import ( 3 | MaskedBertForMultipleChoice, 4 | MaskedBertForQuestionAnswering, 5 | MaskedBertForSequenceClassification, 6 | MaskedBertForTokenClassification, 7 | MaskedBertModel, 8 | ) 9 | from .modules import * 10 | -------------------------------------------------------------------------------- /examples/research_projects/performer/full_script.sh: -------------------------------------------------------------------------------- 1 | TOKENIZERS_PARALLELISM=true python run_mlm_performer.py --output_dir experiments --dataset_name wikipedia --dataset_config_name 20200501.en --model_name_or_path bert-large-cased --tokenizer_name bert-large-cased --do_train --overwrite_output_dir --per_device_train_batch_size 4 --learning_rate 5e-4 --warmup_steps 100 --num_train_epochs 3 --performer -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | psutil 4 | sacrebleu 5 | rouge-score 6 | tensorflow_datasets 7 | pytorch-lightning 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_sentencepiece_and_tokenizers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | SLOW_TO_FAST_CONVERTERS = None 6 | 7 | 8 | def convert_slow_tokenizer(*args, **kwargs): 9 | requires_backends(convert_slow_tokenizer, ["sentencepiece", "tokenizers"]) 10 | -------------------------------------------------------------------------------- /examples/research_projects/performer/sanity_script.sh: -------------------------------------------------------------------------------- 1 | TOKENIZERS_PARALLELISM=true python run_mlm_performer.py --output_dir experiments --dataset_name wikipedia --dataset_config_name 20200501.simple --model_name_or_path bert-base-cased --tokenizer_name bert-base-cased --do_train --overwrite_output_dir --per_device_train_batch_size 4 --learning_rate 5e-4 --warmup_steps 100 --num_train_epochs 3 --performer -------------------------------------------------------------------------------- /src/transformers/utils/dummy_tensorflow_text_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class TFBertTokenizer(metaclass=DummyObject): 6 | _backends = ["tensorflow_text"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["tensorflow_text"]) 10 | -------------------------------------------------------------------------------- /templates/adding_a_new_example_script/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "example_name": "text classification", 3 | "directory_name": "{{cookiecutter.example_name|lower|replace(' ', '-')}}", 4 | "example_shortcut": "{{cookiecutter.directory_name}}", 5 | "model_class": "AutoModel", 6 | "authors": "The HuggingFace Team", 7 | "can_train_from_scratch": ["True", "False"], 8 | "with_trainer": ["True", "False"] 9 | } -------------------------------------------------------------------------------- /examples/research_projects/pplm/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | pytorch-lightning 9 | matplotlib 10 | git-python==1.0.3 11 | faiss-cpu 12 | streamlit 13 | elasticsearch 14 | nltk 15 | pandas 16 | datasets >= 1.1.3 17 | fire 18 | pytest 19 | conllu 20 | sentencepiece != 0.1.92 21 | protobuf 22 | transformers==3.5.1 23 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 28996 14 | } 15 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-uncased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 30522 14 | } 15 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-multilingual-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 119547 14 | } 15 | -------------------------------------------------------------------------------- /templates/adding_a_missing_tokenization_test/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "BrandNewBERT", 3 | "uppercase_modelname": "BRAND_NEW_BERT", 4 | "lowercase_modelname": "brand_new_bert", 5 | "camelcase_modelname": "BrandNewBert", 6 | "has_slow_class": ["True", "False"], 7 | "has_fast_class": ["True", "False"], 8 | "slow_tokenizer_use_sentencepiece": ["True", "False"], 9 | "authors": "The HuggingFace Team" 10 | } 11 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune.sh: -------------------------------------------------------------------------------- 1 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 2 | # run ./finetune.sh --help to see all the possible options 3 | python finetune.py \ 4 | --learning_rate=3e-5 \ 5 | --fp16 \ 6 | --gpus 1 \ 7 | --do_train \ 8 | --do_predict \ 9 | --n_val 1000 \ 10 | --val_check_interval 0.1 \ 11 | "$@" 12 | -------------------------------------------------------------------------------- /.github/workflows/TROUBLESHOOT.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | This is a document explaining how to deal with various issues on github-actions self-hosted CI. The entries may include actually solutions or pointers to Issues that cover those. 4 | 5 | ## GitHub Actions (self-hosted CI) 6 | 7 | * Deepspeed 8 | 9 | - if jit build hangs, clear out `rm -rf ~/.cache/torch_extensions/` reference: https://github.com/huggingface/transformers/pull/12723 10 | -------------------------------------------------------------------------------- /examples/research_projects/visual_bert/README.md: -------------------------------------------------------------------------------- 1 | # VisualBERT Demo 2 | 3 | This demo shows usage of VisualBERT VQA model and is adapted from LXMERT demo present [here](https://github.com/huggingface/transformers/blob/main/examples/research_projects/lxmert/demo.ipynb). 4 | 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate`` 5 | 2. install reqs: ``pip install -r ./requirements.txt`` 6 | 3. usage is as shown in demo.ipynb 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = transformers 7 | line_length = 119 8 | lines_after_imports = 2 9 | multi_line_output = 3 10 | use_parentheses = True 11 | 12 | [flake8] 13 | ignore = E203, E501, E741, W503, W605 14 | max-line-length = 119 15 | 16 | [tool:pytest] 17 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS -------------------------------------------------------------------------------- /examples/research_projects/vqgan-clip/requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | gradio 3 | icecream 4 | imageio 5 | lpips 6 | matplotlib 7 | more_itertools 8 | numpy 9 | omegaconf 10 | opencv_python_headless 11 | Pillow 12 | pudb 13 | pytorch_lightning 14 | PyYAML 15 | requests 16 | scikit_image 17 | scipy 18 | setuptools 19 | streamlit 20 | taming-transformers 21 | torch 22 | torchvision 23 | tqdm 24 | transformers==4.26.0 25 | tokenizers==0.13.2 26 | typing_extensions 27 | wandb 28 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplatePT", 3 | "uppercase_modelname": "TEMPLATE_PT", 4 | "lowercase_modelname": "template_pt", 5 | "camelcase_modelname": "TemplatePt", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_pytorch_and_flax": "PyTorch", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateTF", 3 | "uppercase_modelname": "TEMPLATE_TF", 4 | "lowercase_modelname": "template_tf", 5 | "camelcase_modelname": "TemplateTf", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_pytorch_and_flax": "TensorFlow", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "Template", 3 | "uppercase_modelname": "TEMPLATE", 4 | "lowercase_modelname": "template", 5 | "camelcase_modelname": "Template", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_pytorch_and_flax": "PyTorch, TensorFlow and Flax", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/flax-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateFLAX", 3 | "uppercase_modelname": "TEMPLATE_FLAX", 4 | "lowercase_modelname": "template_flax", 5 | "camelcase_modelname": "TemplateFlax", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_pytorch_and_flax": "Flax", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/standalone.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateBI", 3 | "uppercase_modelname": "TEMPLATE_BI", 4 | "lowercase_modelname": "template_bi", 5 | "camelcase_modelname": "TemplateBi", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "bi-brand-new-bert-base-cased", 8 | "tokenizer_type": "Standalone", 9 | "generate_tensorflow_pytorch_and_flax": "PyTorch, TensorFlow and Flax", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /utils/test_module/custom_configuration.py: -------------------------------------------------------------------------------- 1 | from transformers import PretrainedConfig 2 | 3 | 4 | class CustomConfig(PretrainedConfig): 5 | model_type = "custom" 6 | 7 | def __init__(self, attribute=1, **kwargs): 8 | self.attribute = attribute 9 | super().__init__(**kwargs) 10 | 11 | 12 | class NoSuperInitConfig(PretrainedConfig): 13 | model_type = "custom" 14 | 15 | def __init__(self, attribute=1, **kwargs): 16 | self.attribute = attribute 17 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_t5.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | python finetune.py \ 5 | --data_dir=$CNN_DIR \ 6 | --learning_rate=3e-5 \ 7 | --train_batch_size=$BS \ 8 | --eval_batch_size=$BS \ 9 | --output_dir=$OUTPUT_DIR \ 10 | --max_source_length=512 \ 11 | --max_target_length=56 \ 12 | --val_check_interval=0.1 --n_val=200 \ 13 | --do_train --do_predict \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "PTNewENCDEC", 3 | "uppercase_modelname": "PT_NEW_ENC_DEC", 4 | "lowercase_modelname": "pt_new_enc_dec_template", 5 | "camelcase_modelname": "PtNewEncDec", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "pt-new-enc-dec-base", 8 | "tokenizer_type": "Based on BART", 9 | "generate_tensorflow_pytorch_and_flax": "PyTorch", 10 | "is_encoder_decoder_model": "True" 11 | } 12 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilroberta-base.json: -------------------------------------------------------------------------------- 1 | { 2 | "vocab_size": 50265, 3 | "hidden_size": 768, 4 | "num_hidden_layers": 6, 5 | "num_attention_heads": 12, 6 | "intermediate_size": 3072, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "attention_probs_dropout_prob": 0.1, 10 | "max_position_embeddings": 514, 11 | "type_vocab_size": 1, 12 | "initializer_range": 0.02, 13 | "layer_norm_eps": 0.00001 14 | } -------------------------------------------------------------------------------- /examples/research_projects/jax-projects/big_bird/sweep_flax.yaml: -------------------------------------------------------------------------------- 1 | command: 2 | - python3 3 | - train.py 4 | method: random 5 | parameters: 6 | lr: 7 | values: [4e-5, 3e-5] 8 | warmup_steps: 9 | values: [20000, 15000, 10000, 5000] 10 | weight_decay: 11 | distribution: normal 12 | mu: 1e-2 13 | sigma: 2e-3 14 | metric: 15 | name: eval_loss 16 | goal: minimize 17 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_detectron2_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import requires_backends 3 | 4 | 5 | LAYOUTLM_V2_PRETRAINED_MODEL_ARCHIVE_LIST = None 6 | 7 | 8 | class LayoutLMv2Model: 9 | def __init__(self, *args, **kwargs): 10 | requires_backends(self, ["detectron2"]) 11 | 12 | @classmethod 13 | def from_pretrained(cls, *args, **kwargs): 14 | requires_backends(cls, ["detectron2"]) 15 | -------------------------------------------------------------------------------- /examples/pytorch/_tests_requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu >= 1.4.12 6 | git+https://github.com/huggingface/accelerate@main#egg=accelerate 7 | rouge-score 8 | tensorflow_datasets 9 | matplotlib 10 | git-python==1.0.3 11 | faiss-cpu 12 | streamlit 13 | elasticsearch 14 | nltk 15 | pandas 16 | datasets >= 1.13.3 17 | fire 18 | pytest 19 | conllu 20 | sentencepiece != 0.1.92 21 | protobuf 22 | torchvision 23 | jiwer 24 | librosa 25 | evaluate >= 0.2.0 26 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/flax-seq-2-seq-bart-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "FlaxNewENCDEC", 3 | "uppercase_modelname": "FLAX_NEW_ENC_DEC", 4 | "lowercase_modelname": "flax_new_enc_dec_template", 5 | "camelcase_modelname": "FlaxNewEncDec", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "new-flax-enc-dec-base", 8 | "tokenizer_type": "Based on BART", 9 | "generate_tensorflow_pytorch_and_flax": "Flax", 10 | "is_encoder_decoder_model": "True" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "NewTFENCDEC", 3 | "uppercase_modelname": "NEW_TF_ENC_DEC", 4 | "lowercase_modelname": "new_tf_enc_dec_template", 5 | "camelcase_modelname": "NewTFEncDec", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "new-tf-enc-dec-base_template", 8 | "tokenizer_type": "Based on BART", 9 | "generate_tensorflow_pytorch_and_flax": "TensorFlow", 10 | "is_encoder_decoder_model": "True" 11 | } 12 | -------------------------------------------------------------------------------- /examples/tensorflow/_tests_requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow<2.11 2 | tensorboard 3 | scikit-learn 4 | seqeval 5 | psutil 6 | sacrebleu >= 1.4.12 7 | git+https://github.com/huggingface/accelerate@main#egg=accelerate 8 | rouge-score 9 | tensorflow_datasets 10 | matplotlib 11 | git-python==1.0.3 12 | faiss-cpu 13 | streamlit 14 | elasticsearch 15 | nltk 16 | pandas 17 | datasets >= 1.13.3 18 | fire 19 | pytest 20 | conllu 21 | sentencepiece != 0.1.92 22 | protobuf 23 | jiwer 24 | librosa 25 | evaluate >= 0.2.0 26 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/test_rag_new_features.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH="../":"${PYTHONPATH}" 2 | 3 | python use_own_knowledge_dataset.py 4 | 5 | ray start --head 6 | python finetune_rag.py \ 7 | --model_name_or_path facebook/rag-token-base \ 8 | --model_type rag_token \ 9 | --context_encoder_name facebook/dpr-ctx_encoder-multiset-base \ 10 | --fp16 \ 11 | --gpus 1 \ 12 | --profile \ 13 | --end2end \ 14 | --index_name custom 15 | 16 | ray stop 17 | -------------------------------------------------------------------------------- /src/transformers/models/esm/openfold_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .chunk_utils import chunk_layer 2 | from .data_transforms import make_atom14_masks 3 | from .feats import atom14_to_atom37, frames_and_literature_positions_to_atom14_pos, torsion_angles_to_frames 4 | from .loss import compute_predicted_aligned_error, compute_tm 5 | from .protein import Protein as OFProtein 6 | from .protein import to_pdb 7 | from .rigid_utils import Rigid, Rotation 8 | from .tensor_utils import dict_multimap, flatten_final_dims, permute_final_dims 9 | -------------------------------------------------------------------------------- /docs/source/ko/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers 설치 방법 4 | ! pip install transformers datasets 5 | # 마지막 릴리스 대신 소스에서 설치하려면, 위 명령을 주석으로 바꾸고 아래 명령을 해제하세요. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build PR Documentation 2 | 3 | on: 4 | pull_request: 5 | 6 | concurrency: 7 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | build: 12 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main 13 | with: 14 | commit_sha: ${{ github.event.pull_request.head.sha }} 15 | pr_number: ${{ github.event.number }} 16 | package: transformers 17 | languages: de en es fr it ko pt zh 18 | -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - doc-builder* 8 | - v*-release 9 | - use_templates 10 | 11 | jobs: 12 | build: 13 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main 14 | with: 15 | commit_sha: ${{ github.sha }} 16 | package: transformers 17 | notebook_folder: transformers_doc 18 | languages: de en es fr it ko pt zh 19 | secrets: 20 | token: ${{ secrets.HUGGINGFACE_PUSH }} 21 | -------------------------------------------------------------------------------- /docs/source/de/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/es/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/en/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/pt/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt: -------------------------------------------------------------------------------- 1 | [{"id": 8222595, "category_id": 17, "iscrowd": 0, "bbox": [18, 54, 301, 415], "area": 53306}, {"id": 8225432, "category_id": 17, "iscrowd": 0, "bbox": [349, 26, 291, 343], "area": 59627}, {"id": 8798150, "category_id": 63, "iscrowd": 0, "bbox": [1, 0, 639, 474], "area": 174579}, {"id": 14466198, "category_id": 75, "iscrowd": 0, "bbox": [42, 74, 133, 45], "area": 4068}, {"id": 12821912, "category_id": 75, "iscrowd": 0, "bbox": [333, 80, 38, 106], "area": 2118}, {"id": 10898909, "category_id": 93, "iscrowd": 0, "bbox": [0, 0, 640, 480], "area": 2750}] -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | version: 2.1 3 | contact_links: 4 | - name: Model checkpoints on the Hugging Face Hub 5 | url: https://huggingface.co/models 6 | about: Open a Pull request / Discussion related to a specific model checkpoint directly on the Hugging Face Hub 7 | - name: Website Related 8 | url: https://github.com/huggingface/hub-docs/issues 9 | about: Feature requests and bug reports related to the website 10 | - name: Forum 11 | url: https://discuss.huggingface.co/ 12 | about: General usage questions and community discussions 13 | -------------------------------------------------------------------------------- /docs/source/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets evaluate 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/fr/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installation de Transformers 4 | ! pip install transformers datasets 5 | # Pour installer à partir du code source au lieu de la dernière version, commentez la commande ci-dessus et décommentez la suivante. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py37'] 4 | 5 | [tool.ruff] 6 | # Never enforce `E501` (line length violations). 7 | ignore = ["C901", "E501", "E741", "W605"] 8 | select = ["C", "E", "F", "I", "W"] 9 | line-length = 119 10 | 11 | # Ignore import violations in all `__init__.py` files. 12 | [tool.ruff.per-file-ignores] 13 | "__init__.py" = ["E402", "F401", "F403", "F811"] 14 | "src/transformers/file_utils.py" = ["F401"] 15 | "src/transformers/utils/dummy_*.py" = ["F401"] 16 | 17 | [tool.ruff.isort] 18 | lines-after-imports = 2 19 | known-first-party = ["transformers"] 20 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_pegasus_xsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | # From appendix C of paper https://arxiv.org/abs/1912.08777 5 | # Set --gradient_accumulation_steps so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16) 6 | python finetune.py \ 7 | --learning_rate=1e-4 \ 8 | --do_train \ 9 | --do_predict \ 10 | --n_val 1000 \ 11 | --val_check_interval 0.25 \ 12 | --max_source_length 512 --max_target_length 56 \ 13 | --freeze_embeds --label_smoothing 0.1 --adafactor --task summarization_xsum \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /tests/fixtures/add_distilbert_like_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_copied_from": true, 3 | "old_model_type": "distilbert", 4 | "new_model_patterns": { 5 | "model_name": "BERT New", 6 | "checkpoint": "huggingface/bert-new-base", 7 | "model_type": "bert-new", 8 | "model_lower_cased": "bert_new", 9 | "model_camel_cased": "BertNew", 10 | "model_upper_cased": "BERT_NEW", 11 | "config_class": "BertNewConfig", 12 | "tokenizer_class": "DistilBertTokenizer" 13 | }, 14 | "frameworks": [ 15 | "pt", 16 | "tf", 17 | "flax" 18 | ] 19 | } -------------------------------------------------------------------------------- /docs/source/it/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installazione di Transformers 4 | ! pip install transformers datasets 5 | # Per installare dalla fonte invece dell'ultima versione rilasciata, commenta il comando sopra e 6 | # rimuovi la modalità commento al comando seguente. 7 | # ! pip install git+https://github.com/huggingface/transformers.git 8 | """ 9 | 10 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 11 | black_avoid_patterns = { 12 | "{processor_class}": "FakeProcessorClass", 13 | "{model_class}": "FakeModelClass", 14 | "{object_class}": "FakeObjectClass", 15 | } 16 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Stale Bot 2 | 3 | on: 4 | schedule: 5 | - cron: "0 15 * * *" 6 | 7 | jobs: 8 | close_stale_issues: 9 | name: Close Stale Issues 10 | if: github.repository == 'huggingface/transformers' 11 | runs-on: ubuntu-latest 12 | env: 13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - name: Setup Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: 3.7 21 | 22 | - name: Install requirements 23 | run: | 24 | pip install PyGithub 25 | - name: Close stale issues 26 | run: | 27 | python scripts/stale.py 28 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "BrandNewBERT", 3 | "uppercase_modelname": "BRAND_NEW_BERT", 4 | "lowercase_modelname": "brand_new_bert", 5 | "camelcase_modelname": "BrandNewBert", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"], 9 | "generate_tensorflow_pytorch_and_flax": [ 10 | "PyTorch, TensorFlow and Flax", 11 | "PyTorch & TensorFlow", 12 | "PyTorch & Flax", 13 | "TensorFlow & Flax", 14 | "PyTorch", 15 | "TensorFlow", 16 | "Flax" 17 | ], 18 | "is_encoder_decoder_model": ["True", "False"] 19 | } 20 | -------------------------------------------------------------------------------- /examples/research_projects/longform-qa/README.md: -------------------------------------------------------------------------------- 1 | # Long Form Question Answering 2 | 3 | Author: @yjernite 4 | 5 | This folder contains the code for the Long Form Question answering [demo](http://35.226.96.115:8080/) as well as methods to train and use a fully end-to-end Long Form Question Answering system using the [🤗transformers](https://github.com/huggingface/transformers) and [🤗datasets](https://github.com/huggingface/datasets) libraries. 6 | 7 | You can use these methods to train your own system by following along the associate [notebook](https://github.com/huggingface/notebooks/blob/master/longform-qa/Long_Form_Question_Answering_with_ELI5_and_Wikipedia.ipynb) or [blog post](https://yjernite.github.io/lfqa.html). 8 | -------------------------------------------------------------------------------- /src/transformers/models/gpt2/CONVERSION.md: -------------------------------------------------------------------------------- 1 | Here is how to convert a GPT2 model generated outside of `transformers` 2 | 3 | * [Megatron-LM](https://github.com/NVIDIA/Megatron-LM)-generated model: 4 | 5 | Use [convert_megatron_gpt2_checkpoint.py](../megatron_gpt2/convert_megatron_gpt2_checkpoint.py) 6 | 7 | * [big-science fork of Megatron-Deepspeed](https://github.com/bigscience-workshop/Megatron-DeepSpeed/)-generated model: 8 | 9 | Use the instructions [here](https://github.com/bigscience-workshop/bigscience/tree/aa872e754106f6678e8a9dac8c6962404ba39a6d/train/tr1-13B-base#checkpoint-conversion-and-upload). This approach uses a set of scripts that require the use of this particular fork of Megatron-Deepspeed. 10 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "{{cookiecutter.modelname}}", 3 | "uppercase_modelname": "{{cookiecutter.uppercase_modelname}}", 4 | "lowercase_modelname": "{{cookiecutter.lowercase_modelname}}", 5 | "camelcase_modelname": "{{cookiecutter.camelcase_modelname}}", 6 | "authors": "{{cookiecutter.authors}}", 7 | "checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}", 8 | "tokenizer_type": "{{cookiecutter.tokenizer_type}}", 9 | "generate_tensorflow_pytorch_and_flax": "{{cookiecutter.generate_tensorflow_pytorch_and_flax}}", 10 | "is_encoder_decoder_model": "{{cookiecutter.is_encoder_decoder_model}}" 11 | } 12 | -------------------------------------------------------------------------------- /src/transformers/models/megatron_gpt2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 NVIDIA Corporation and The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /docs/source/de/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: 🤗 Transformers 4 | - local: quicktour 5 | title: Schnellstart 6 | - local: installation 7 | title: Installation 8 | title: Erste Schritte 9 | - sections: 10 | - local: pipeline_tutorial 11 | title: Pipelines für Inferenzen 12 | - local: autoclass_tutorial 13 | title: Laden von vortrainierten Instanzen mit einer AutoClass 14 | - local: preprocessing 15 | title: Vorverarbeiten 16 | - local: training 17 | title: Optimierung eines vortrainierten Modells 18 | - local: accelerate 19 | title: Verteiltes Training mit 🤗 Accelerate 20 | - local: model_sharing 21 | title: Ein Modell teilen 22 | title: Tutorials 23 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_base_100.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-base-100h" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="32" \ 6 | --per_device_eval_batch_size="32" \ 7 | --evaluation_strategy="steps" \ 8 | --save_total_limit="3" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-base" \ 15 | --fp16 \ 16 | --dataset_name="librispeech_asr" \ 17 | --dataset_config_name="clean" \ 18 | --train_split_name="train.100" \ 19 | --preprocessing_num_workers="32" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor 22 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_mbart_cc25_enro.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | python finetune.py \ 5 | --learning_rate=3e-5 \ 6 | --fp16 \ 7 | --do_train \ 8 | --val_check_interval=0.25 \ 9 | --adam_eps 1e-06 \ 10 | --num_train_epochs 6 --src_lang en_XX --tgt_lang ro_RO \ 11 | --data_dir $ENRO_DIR \ 12 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 13 | --train_batch_size=$BS --eval_batch_size=$BS \ 14 | --task translation \ 15 | --warmup_steps 500 \ 16 | --freeze_embeds \ 17 | --model_name_or_path=facebook/mbart-large-cc25 \ 18 | "$@" 19 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_lv60_100.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-lv60-100h" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="16" \ 6 | --per_device_eval_batch_size="16" \ 7 | --evaluation_strategy="steps" \ 8 | --save_total_limit="3" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-large-lv60" \ 15 | --fp16 \ 16 | --dataset_name="librispeech_asr" \ 17 | --dataset_config_name="clean" \ 18 | --train_split_name="train.100" \ 19 | --preprocessing_num_workers="32" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor 22 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | torch 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_base_timit_asr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-base-timit-asr" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="20" \ 6 | --per_device_eval_batch_size="20" \ 7 | --evaluation_strategy="steps" \ 8 | --save_steps="500" \ 9 | --eval_steps="100" \ 10 | --logging_steps="50" \ 11 | --learning_rate="5e-4" \ 12 | --warmup_steps="3000" \ 13 | --model_name_or_path="facebook/wav2vec2-base" \ 14 | --fp16 \ 15 | --dataset_name="timit_asr" \ 16 | --train_split_name="train" \ 17 | --validation_split_name="test" \ 18 | --orthography="timit" \ 19 | --preprocessing_num_workers="$(nproc)" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor \ 22 | --verbose_logging \ 23 | -------------------------------------------------------------------------------- /docker/transformers-tensorflow-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | tensorflow-cpu 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] 26 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/dynamic_bs_example.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | export MAX_LEN=128 5 | export m=sshleifer/student_marian_en_ro_6_1 6 | python finetune.py \ 7 | --learning_rate=3e-4 \ 8 | --do_train \ 9 | --fp16 \ 10 | --data_dir wmt_en_ro \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --freeze_encoder --freeze_embeds \ 13 | --train_batch_size=48 --eval_batch_size=64 \ 14 | --tokenizer_name $m --model_name_or_path $m --num_train_epochs=1 \ 15 | --warmup_steps 500 --logger_name wandb --gpus 1 \ 16 | --fp16_opt_level=O1 --task translation \ 17 | "$@" 18 | -------------------------------------------------------------------------------- /docker/transformers-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow-cpu \ 19 | torch 20 | 21 | WORKDIR /workspace 22 | COPY . transformers/ 23 | RUN cd transformers/ && \ 24 | python3 -m pip install --no-cache-dir . 25 | 26 | CMD ["/bin/bash"] 27 | -------------------------------------------------------------------------------- /examples/research_projects/pplm/pplm_classification_head.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class ClassificationHead(nn.Module): 5 | """Classification Head for transformer encoders""" 6 | 7 | def __init__(self, class_size, embed_size): 8 | super().__init__() 9 | self.class_size = class_size 10 | self.embed_size = embed_size 11 | # self.mlp1 = nn.Linear(embed_size, embed_size) 12 | # self.mlp2 = (nn.Linear(embed_size, class_size)) 13 | self.mlp = nn.Linear(embed_size, class_size) 14 | 15 | def forward(self, hidden_state): 16 | # hidden_state = nn.functional.relu(self.mlp1(hidden_state)) 17 | # hidden_state = self.mlp2(hidden_state) 18 | logits = self.mlp(hidden_state) 19 | return logits 20 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/sentence_splitter.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from filelock import FileLock 4 | 5 | 6 | try: 7 | import nltk 8 | 9 | NLTK_AVAILABLE = True 10 | except (ImportError, ModuleNotFoundError): 11 | NLTK_AVAILABLE = False 12 | 13 | if NLTK_AVAILABLE: 14 | with FileLock(".lock") as lock: 15 | nltk.download("punkt", quiet=True) 16 | 17 | 18 | def add_newline_to_end_of_each_sentence(x: str) -> str: 19 | """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS.""" 20 | re.sub("", "", x) # remove pegasus newline char 21 | assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)" 22 | return "\n".join(nltk.sent_tokenize(x)) 23 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/distil_marian_no_teacher.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | export MAX_LEN=128 5 | python finetune.py \ 6 | --learning_rate=3e-4 \ 7 | --do_train \ 8 | --do_predict \ 9 | --fp16 \ 10 | --val_check_interval 0.25 \ 11 | --data_dir $ENRO_DIR \ 12 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 13 | --freeze_encoder --freeze_embeds \ 14 | --train_batch_size=$BS --eval_batch_size=$BS \ 15 | --tokenizer_name $m --model_name_or_path $m \ 16 | --warmup_steps 500 --sortish_sampler --logger_name wandb \ 17 | --gpus 1 --fp16_opt_level=O1 --task translation --num_sanity_val_steps=0 \ 18 | "$@" 19 | -------------------------------------------------------------------------------- /docs/source/en/perf_infer_special.mdx: -------------------------------------------------------------------------------- 1 | 11 | 12 | # Inference on Specialized Hardware 13 | 14 | This document will be completed soon with information on how to infer on specialized hardware. In the meantime you can check out [the guide for inference on CPUs](perf_infer_cpu). -------------------------------------------------------------------------------- /examples/research_projects/mm-imdb/README.md: -------------------------------------------------------------------------------- 1 | ## MM-IMDb 2 | 3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/main/examples/research_projects/mm-imdb/run_mmimdb.py). 4 | 5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata. 6 | 7 | ### Training on MM-IMDb 8 | 9 | ``` 10 | python run_mmimdb.py \ 11 | --data_dir /path/to/mmimdb/dataset/ \ 12 | --model_type bert \ 13 | --model_name_or_path bert-base-uncased \ 14 | --output_dir /path/to/save/dir/ \ 15 | --do_train \ 16 | --do_eval \ 17 | --max_seq_len 512 \ 18 | --gradient_accumulation_steps 20 \ 19 | --num_image_embeds 3 \ 20 | --num_train_epochs 100 \ 21 | --patience 5 22 | ``` 23 | 24 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_distilbart_cnn.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | export BS=32 5 | export GAS=1 6 | 7 | python finetune.py \ 8 | --learning_rate=3e-5 \ 9 | --fp16 \ 10 | --gpus 1 \ 11 | --do_train \ 12 | --do_predict \ 13 | --val_check_interval 0.25 \ 14 | --n_val 500 \ 15 | --num_train_epochs 2 \ 16 | --freeze_encoder --freeze_embeds --data_dir cnn_dm \ 17 | --max_target_length 142 --val_max_target_length=142 \ 18 | --train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \ 19 | --model_name_or_path sshleifer/student_cnn_12_6 \ 20 | --tokenizer_name facebook/bart-large \ 21 | --warmup_steps 500 \ 22 | --output_dir distilbart-cnn-12-6 \ 23 | "$@" 24 | 25 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_lv60_timit_asr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-lv60-timit-asr" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="2" \ 6 | --per_device_eval_batch_size="2" \ 7 | --gradient_accumulation_steps="4" \ 8 | --evaluation_strategy="steps" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-large-lv60" \ 15 | --fp16 \ 16 | --dataset_name="timit_asr" \ 17 | --train_split_name="train" \ 18 | --validation_split_name="test" \ 19 | --orthography="timit" \ 20 | --preprocessing_num_workers="$(nproc)" \ 21 | --group_by_length \ 22 | --freeze_feature_extractor \ 23 | --verbose_logging \ 24 | -------------------------------------------------------------------------------- /src/transformers/sagemaker/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .trainer_sm import SageMakerTrainer 16 | from .training_args_sm import SageMakerTrainingArguments, is_sagemaker_dp_enabled 17 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_wav2vec2_xlsr_turkish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_common_voice.py \ 3 | --model_name_or_path="facebook/wav2vec2-large-xlsr-53" \ 4 | --dataset_config_name="tr" \ 5 | --output_dir=./wav2vec2-large-xlsr-turkish-demo \ 6 | --overwrite_output_dir \ 7 | --num_train_epochs="5" \ 8 | --per_device_train_batch_size="16" \ 9 | --evaluation_strategy="steps" \ 10 | --learning_rate="3e-4" \ 11 | --warmup_steps="500" \ 12 | --fp16 \ 13 | --freeze_feature_extractor \ 14 | --save_steps="400" \ 15 | --eval_steps="400" \ 16 | --save_total_limit="3" \ 17 | --logging_steps="400" \ 18 | --group_by_length \ 19 | --feat_proj_dropout="0.0" \ 20 | --layerdrop="0.1" \ 21 | --gradient_checkpointing \ 22 | --do_train --do_eval 23 | -------------------------------------------------------------------------------- /examples/pytorch/token-classification/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | python3 run_ner.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name conll2003 \ 18 | --output_dir /tmp/test-ner \ 19 | --do_train \ 20 | --do_eval 21 | -------------------------------------------------------------------------------- /examples/pytorch/multiple-choice/run_no_trainer.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | accelerate launch run_swag_no_trainer.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name swag \ 18 | --output_dir /tmp/test-swag-no-trainer \ 19 | --pad_to_max_length 20 | -------------------------------------------------------------------------------- /src/transformers/models/deformable_detr/custom_kernel/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include "ms_deform_attn.h" 12 | 13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 14 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 15 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 16 | } -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/distil_marian_enro_teacher.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | # export MAX_LEN=128 5 | python distillation.py \ 6 | --learning_rate=3e-4 \ 7 | --do_train \ 8 | --fp16 \ 9 | --val_check_interval 0.25 \ 10 | --teacher Helsinki-NLP/opus-mt-en-ro \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --student_decoder_layers 3 --student_encoder_layers 6 \ 13 | --freeze_encoder --freeze_embeds \ 14 | --model_name_or_path IGNORED \ 15 | --alpha_hid=3. \ 16 | --train_batch_size=$BS --eval_batch_size=$BS \ 17 | --tokenizer_name Helsinki-NLP/opus-mt-en-ro \ 18 | --warmup_steps 500 --logger_name wandb \ 19 | --fp16_opt_level O1 --task translation --normalize_hidden --num_sanity_val_steps=0 \ 20 | "$@" 21 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_distilbart_xsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | python distillation.py \ 4 | --teacher facebook/bart-large-xsum --data_dir xsum \ 5 | --tokenizer_name facebook/bart-large-xsum \ 6 | --student_decoder_layers 6 --student_encoder_layers 12 \ 7 | --freeze_encoder --freeze_embeds \ 8 | --learning_rate=3e-4 \ 9 | --do_train \ 10 | --do_predict \ 11 | --fp16 --fp16_opt_level=O1 \ 12 | --val_check_interval 0.1 --n_val 1000 --eval_beams 2 --length_penalty=0.5 \ 13 | --max_target_length=60 --val_max_target_length=60 --test_max_target_length=100 \ 14 | --model_name_or_path IGNORED \ 15 | --alpha_hid=3. \ 16 | --train_batch_size=16 --eval_batch_size=16 --gradient_accumulation_steps=2 \ 17 | --sortish_sampler \ 18 | --num_train_epochs=6 \ 19 | --warmup_steps 500 \ 20 | --output_dir distilbart_xsum_12_6 \ 21 | "$@" 22 | -------------------------------------------------------------------------------- /utils/test_module/custom_modeling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from transformers import PreTrainedModel 4 | 5 | from .custom_configuration import CustomConfig, NoSuperInitConfig 6 | 7 | 8 | class CustomModel(PreTrainedModel): 9 | config_class = CustomConfig 10 | 11 | def __init__(self, config): 12 | super().__init__(config) 13 | self.linear = torch.nn.Linear(config.hidden_size, config.hidden_size) 14 | 15 | def forward(self, x): 16 | return self.linear(x) 17 | 18 | def _init_weights(self, module): 19 | pass 20 | 21 | 22 | class NoSuperInitModel(PreTrainedModel): 23 | config_class = NoSuperInitConfig 24 | 25 | def __init__(self, config): 26 | super().__init__(config) 27 | self.linear = torch.nn.Linear(config.attribute, config.attribute) 28 | 29 | def forward(self, x): 30 | return self.linear(x) 31 | 32 | def _init_weights(self, module): 33 | pass 34 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_xlsr_53_arabic_speech_corpus.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-xlsr-53-arabic-speech-corpus" \ 4 | --num_train_epochs="50" \ 5 | --per_device_train_batch_size="1" \ 6 | --per_device_eval_batch_size="1" \ 7 | --gradient_accumulation_steps="8" \ 8 | --evaluation_strategy="steps" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="elgeish/wav2vec2-large-xlsr-53-arabic" \ 15 | --fp16 \ 16 | --dataset_name="arabic_speech_corpus" \ 17 | --train_split_name="train" \ 18 | --validation_split_name="test" \ 19 | --max_duration_in_seconds="15" \ 20 | --orthography="buckwalter" \ 21 | --preprocessing_num_workers="$(nproc)" \ 22 | --group_by_length \ 23 | --freeze_feature_extractor \ 24 | --target_feature_extractor_sampling_rate \ 25 | --verbose_logging \ 26 | -------------------------------------------------------------------------------- /examples/pytorch/token-classification/run_no_trainer.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | accelerate launch run_ner_no_trainer.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name conll2003 \ 18 | --output_dir /tmp/test-ner \ 19 | --pad_to_max_length \ 20 | --task_name ner \ 21 | --return_entity_level_metrics 22 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/eval_deebert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | if [ $MODEL_TYPE = 'bert' ] 12 | then 13 | MODEL_NAME=${MODEL_NAME}-uncased 14 | fi 15 | 16 | 17 | python -u run_glue_deebert.py \ 18 | --model_type $MODEL_TYPE \ 19 | --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 20 | --task_name $DATASET \ 21 | --do_eval \ 22 | --do_lower_case \ 23 | --data_dir $PATH_TO_DATA/$DATASET \ 24 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 25 | --plot_data_dir ./results/ \ 26 | --max_seq_length 128 \ 27 | --eval_each_highway \ 28 | --eval_highway \ 29 | --overwrite_cache \ 30 | --per_gpu_eval_batch_size=1 31 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/dataset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolume 3 | metadata: 4 | name: huggingface-cluster-disk 5 | spec: 6 | storageClassName: "" 7 | capacity: 8 | storage: 500Gi 9 | accessModes: 10 | - ReadOnlyMany 11 | claimRef: 12 | namespace: default 13 | name: huggingface-cluster-disk-claim 14 | gcePersistentDisk: 15 | pdName: huggingface-cluster-disk 16 | fsType: ext4 17 | readOnly: true 18 | --- 19 | apiVersion: v1 20 | kind: PersistentVolumeClaim 21 | metadata: 22 | name: huggingface-cluster-disk-claim 23 | spec: 24 | # Specify "" as the storageClassName so it matches the PersistentVolume's StorageClass. 25 | # A nil storageClassName value uses the default StorageClass. For details, see 26 | # https://kubernetes.io/docs/concepts/storage/persistent-volumes/#class-1 27 | storageClassName: "" 28 | accessModes: 29 | - ReadOnlyMany 30 | resources: 31 | requests: 32 | storage: 1Ki 33 | -------------------------------------------------------------------------------- /examples/legacy/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Legacy examples 18 | 19 | This folder contains examples which are not actively maintained (mostly contributed by the community). 20 | 21 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working. 22 | -------------------------------------------------------------------------------- /docs/source/en/main_classes/keras_callbacks.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Keras callbacks 14 | 15 | When training a Transformers model with Keras, there are some library-specific callbacks available to automate common 16 | tasks: 17 | 18 | ## KerasMetricCallback 19 | 20 | [[autodoc]] KerasMetricCallback 21 | 22 | ## PushToHubCallback 23 | 24 | [[autodoc]] PushToHubCallback 25 | -------------------------------------------------------------------------------- /docker/transformers-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow \ 19 | torch 20 | 21 | RUN git clone https://github.com/NVIDIA/apex 22 | RUN cd apex && \ 23 | python3 setup.py install && \ 24 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 25 | 26 | WORKDIR /workspace 27 | COPY . transformers/ 28 | RUN cd transformers/ && \ 29 | python3 -m pip install --no-cache-dir . 30 | 31 | CMD ["/bin/bash"] 32 | -------------------------------------------------------------------------------- /examples/research_projects/fsner/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | with open("README.md", "r", encoding="utf-8") as fh: 5 | long_description = fh.read() 6 | 7 | setuptools.setup( 8 | name="fsner", 9 | version="0.0.1", 10 | author="msi sayef", 11 | author_email="msi.sayef@gmail.com", 12 | description="Few-shot Named Entity Recognition", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/huggingface/transformers/tree/main/examples/research_projects/fsner", 16 | project_urls={ 17 | "Bug Tracker": "https://github.com/huggingface/transformers/issues", 18 | }, 19 | classifiers=[ 20 | "Programming Language :: Python :: 3", 21 | "Operating System :: OS Independent", 22 | ], 23 | package_dir={"": "src"}, 24 | packages=setuptools.find_packages(where="src"), 25 | python_requires=">=3.6", 26 | install_requires=["torch>=1.9.0", "transformers>=4.9.2"], 27 | ) 28 | -------------------------------------------------------------------------------- /src/transformers/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .glue import GlueDataset, GlueDataTrainingArguments 16 | from .language_modeling import ( 17 | LineByLineTextDataset, 18 | LineByLineWithRefDataset, 19 | LineByLineWithSOPTextDataset, 20 | TextDataset, 21 | TextDatasetForNextSentencePrediction, 22 | ) 23 | from .squad import SquadDataset, SquadDataTrainingArguments 24 | -------------------------------------------------------------------------------- /docs/source/en/perf_train_tpu.mdx: -------------------------------------------------------------------------------- 1 | 11 | 12 | # Training on TPUs 13 | 14 | 15 | 16 | Note: Most of the strategies introduced in the [single GPU section](perf_train_gpu_one) (such as mixed precision training or gradient accumulation) and [multi-GPU section](perf_train_gpu_many) are generic and apply to training models in general so make sure to have a look at it before diving into this section. 17 | 18 | 19 | 20 | This document will be completed soon with information on how to train on TPUs. 21 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_bart_tiny.sh: -------------------------------------------------------------------------------- 1 | # Script for verifying that run_bart_sum can be invoked from its directory 2 | 3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test) 4 | wget https://cdn-datasets.huggingface.co/summarization/cnn_tiny.tgz 5 | tar -xzvf cnn_tiny.tgz 6 | rm cnn_tiny.tgz 7 | 8 | export OUTPUT_DIR_NAME=bart_utest_output 9 | export CURRENT_DIR=${PWD} 10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 11 | 12 | # Make output directory if it doesn't exist 13 | mkdir -p $OUTPUT_DIR 14 | 15 | # Add parent directory to python path to access lightning_base.py and testing_utils.py 16 | export PYTHONPATH="../":"${PYTHONPATH}" 17 | python finetune.py \ 18 | --data_dir=cnn_tiny/ \ 19 | --model_name_or_path=sshleifer/bart-tiny-random \ 20 | --learning_rate=3e-5 \ 21 | --train_batch_size=2 \ 22 | --eval_batch_size=2 \ 23 | --output_dir=$OUTPUT_DIR \ 24 | --num_train_epochs=1 \ 25 | --gpus=0 \ 26 | --do_train "$@" 27 | 28 | rm -rf cnn_tiny 29 | rm -rf $OUTPUT_DIR 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/vocab/buckwalter.json: -------------------------------------------------------------------------------- 1 | { 2 | "": 0, 3 | "": 1, 4 | "": 2, 5 | "": 3, 6 | "/": 4, 7 | "'": 5, 8 | "|": 6, 9 | ">": 7, 10 | "&": 8, 11 | "<": 9, 12 | "}": 10, 13 | "A": 11, 14 | "b": 12, 15 | "p": 13, 16 | "t": 14, 17 | "v": 15, 18 | "j": 16, 19 | "H": 17, 20 | "x": 18, 21 | "d": 19, 22 | "*": 20, 23 | "r": 21, 24 | "z": 22, 25 | "s": 23, 26 | "$": 24, 27 | "S": 25, 28 | "D": 26, 29 | "T": 27, 30 | "Z": 28, 31 | "E": 29, 32 | "g": 30, 33 | "_": 31, 34 | "f": 32, 35 | "q": 33, 36 | "k": 34, 37 | "l": 35, 38 | "m": 36, 39 | "n": 37, 40 | "h": 38, 41 | "w": 39, 42 | "Y": 40, 43 | "y": 41, 44 | "F": 42, 45 | "N": 43, 46 | "K": 44, 47 | "a": 45, 48 | "u": 46, 49 | "i": 47, 50 | "~": 48, 51 | "o": 49, 52 | "`": 50, 53 | "{": 51, 54 | "P": 52, 55 | "J": 53, 56 | "V": 54, 57 | "G": 55 58 | } -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/scripts/initialize_model.py: -------------------------------------------------------------------------------- 1 | from arguments import InitializationArguments 2 | 3 | from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, HfArgumentParser 4 | 5 | 6 | # Configuration 7 | parser = HfArgumentParser(InitializationArguments) 8 | args = parser.parse_args() 9 | 10 | # Load codeparrot tokenizer trained for Python code tokenization 11 | tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name) 12 | 13 | # Config: "scale_attn_by_layer_idx" and "reorder_and_upcast_attn" are Mistral stability tweaks 14 | config_kwargs = { 15 | "vocab_size": len(tokenizer), 16 | "scale_attn_by_inverse_layer_idx": True, 17 | "reorder_and_upcast_attn": True, 18 | } 19 | 20 | # Load model config (GPT-2 large in this case) 21 | config = AutoConfig.from_pretrained(args.config_name, **config_kwargs) 22 | 23 | # Initialize new model with config 24 | model = AutoModelForCausalLM.from_config(config) 25 | 26 | # Save model to the hub 27 | model.save_pretrained(args.model_name, push_to_hub=args.push_to_hub) 28 | -------------------------------------------------------------------------------- /src/transformers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseTransformersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import io 4 | import json 5 | import subprocess 6 | 7 | 8 | pairs = [ 9 | ["en", "ru"], 10 | ["ru", "en"], 11 | ["en", "de"], 12 | ["de", "en"], 13 | ] 14 | 15 | n_objs = 8 16 | 17 | 18 | def get_all_data(pairs, n_objs): 19 | text = {} 20 | for src, tgt in pairs: 21 | pair = f"{src}-{tgt}" 22 | cmd = f"sacrebleu -t wmt19 -l {pair} --echo src".split() 23 | src_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines() 24 | cmd = f"sacrebleu -t wmt19 -l {pair} --echo ref".split() 25 | tgt_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines() 26 | text[pair] = {"src": src_lines[:n_objs], "tgt": tgt_lines[:n_objs]} 27 | return text 28 | 29 | 30 | text = get_all_data(pairs, n_objs) 31 | filename = "./fsmt_val_data.json" 32 | with io.open(filename, "w", encoding="utf-8") as f: 33 | bleu_data = json.dump(text, f, indent=2, ensure_ascii=False) 34 | -------------------------------------------------------------------------------- /docs/source/en/perf_train_special.mdx: -------------------------------------------------------------------------------- 1 | 11 | 12 | # Training on Specialized Hardware 13 | 14 | 15 | 16 | Note: Most of the strategies introduced in the [single GPU section](perf_train_gpu_one) (such as mixed precision training or gradient accumulation) and [multi-GPU section](perf_train_gpu_many) are generic and apply to training models in general so make sure to have a look at it before diving into this section. 17 | 18 | 19 | 20 | This document will be completed soon with information on how to train on specialized hardware. 21 | -------------------------------------------------------------------------------- /src/transformers/models/tapex/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from ...file_utils import _LazyModule 17 | 18 | 19 | _import_structure = {"tokenization_tapex": ["TapexTokenizer"]} 20 | 21 | 22 | if TYPE_CHECKING: 23 | from .tokenization_tapex import TapexTokenizer 24 | 25 | 26 | else: 27 | import sys 28 | 29 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure) 30 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/run_glue.sh: -------------------------------------------------------------------------------- 1 | # Install example requirements 2 | pip install -r ../requirements.txt 3 | 4 | # Download glue data 5 | python3 ../../utils/download_glue_data.py 6 | 7 | export TASK=mrpc 8 | export DATA_DIR=./glue_data/MRPC/ 9 | export MAX_LENGTH=128 10 | export LEARNING_RATE=2e-5 11 | export BERT_MODEL=bert-base-cased 12 | export BATCH_SIZE=32 13 | export NUM_EPOCHS=3 14 | export SEED=2 15 | export OUTPUT_DIR_NAME=mrpc-pl-bert 16 | export CURRENT_DIR=${PWD} 17 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 18 | 19 | # Make output directory if it doesn't exist 20 | mkdir -p $OUTPUT_DIR 21 | # Add parent directory to python path to access lightning_base.py 22 | export PYTHONPATH="../":"${PYTHONPATH}" 23 | 24 | python3 run_glue.py --gpus 1 --data_dir $DATA_DIR \ 25 | --task $TASK \ 26 | --model_name_or_path $BERT_MODEL \ 27 | --output_dir $OUTPUT_DIR \ 28 | --max_seq_length $MAX_LENGTH \ 29 | --learning_rate $LEARNING_RATE \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --train_batch_size $BATCH_SIZE \ 32 | --seed $SEED \ 33 | --do_train \ 34 | --do_predict 35 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/entropy_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | if [ $MODEL_TYPE = 'bert' ] 12 | then 13 | MODEL_NAME=${MODEL_NAME}-uncased 14 | fi 15 | 16 | ENTROPIES="0 0.1 0.2 0.3 0.4 0.5 0.6 0.7" 17 | 18 | for ENTROPY in $ENTROPIES; do 19 | python -u run_glue_deebert.py \ 20 | --model_type $MODEL_TYPE \ 21 | --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 22 | --task_name $DATASET \ 23 | --do_eval \ 24 | --do_lower_case \ 25 | --data_dir $PATH_TO_DATA/$DATASET \ 26 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 27 | --plot_data_dir ./results/ \ 28 | --max_seq_length 128 \ 29 | --early_exit_entropy $ENTROPY \ 30 | --eval_highway \ 31 | --overwrite_cache \ 32 | --per_gpu_eval_batch_size=1 33 | done 34 | -------------------------------------------------------------------------------- /src/transformers/models/byt5/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from ...utils import _LazyModule 18 | 19 | 20 | _import_structure = {"tokenization_byt5": ["ByT5Tokenizer"]} 21 | 22 | 23 | if TYPE_CHECKING: 24 | from .tokenization_byt5 import ByT5Tokenizer 25 | else: 26 | import sys 27 | 28 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 29 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/finetune.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 16 | # run ./finetune.sh --help to see all the possible options 17 | python finetune_trainer.py \ 18 | --learning_rate=3e-5 \ 19 | --fp16 \ 20 | --do_train --do_eval --do_predict \ 21 | --evaluation_strategy steps \ 22 | --predict_with_generate \ 23 | --n_val 1000 \ 24 | "$@" 25 | -------------------------------------------------------------------------------- /src/transformers/models/phobert/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from ...utils import _LazyModule 18 | 19 | 20 | _import_structure = {"tokenization_phobert": ["PhobertTokenizer"]} 21 | 22 | 23 | if TYPE_CHECKING: 24 | from .tokenization_phobert import PhobertTokenizer 25 | 26 | else: 27 | import sys 28 | 29 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 30 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/train_deebert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | EPOCHS=10 12 | if [ $MODEL_TYPE = 'bert' ] 13 | then 14 | EPOCHS=3 15 | MODEL_NAME=${MODEL_NAME}-uncased 16 | fi 17 | 18 | 19 | python -u run_glue_deebert.py \ 20 | --model_type $MODEL_TYPE \ 21 | --model_name_or_path $MODEL_NAME \ 22 | --task_name $DATASET \ 23 | --do_train \ 24 | --do_eval \ 25 | --do_lower_case \ 26 | --data_dir $PATH_TO_DATA/$DATASET \ 27 | --max_seq_length 128 \ 28 | --per_gpu_eval_batch_size=1 \ 29 | --per_gpu_train_batch_size=8 \ 30 | --learning_rate 2e-5 \ 31 | --num_train_epochs $EPOCHS \ 32 | --overwrite_output_dir \ 33 | --seed 42 \ 34 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 35 | --plot_data_dir ./results/ \ 36 | --save_steps 0 \ 37 | --overwrite_cache \ 38 | --eval_after_first_stage 39 | -------------------------------------------------------------------------------- /src/transformers/data/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels 16 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features 17 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor 18 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels 19 | -------------------------------------------------------------------------------- /src/transformers/models/bertweet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from ...utils import _LazyModule 18 | 19 | 20 | _import_structure = {"tokenization_bertweet": ["BertweetTokenizer"]} 21 | 22 | 23 | if TYPE_CHECKING: 24 | from .tokenization_bertweet import BertweetTokenizer 25 | 26 | else: 27 | import sys 28 | 29 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 30 | -------------------------------------------------------------------------------- /docker/transformers-doc-builder/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | LABEL maintainer="Hugging Face" 3 | 4 | RUN apt update 5 | RUN git clone https://github.com/huggingface/transformers 6 | 7 | RUN python3 -m pip install --no-cache-dir --upgrade pip && python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder ./transformers[dev] 8 | RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y tesseract-ocr 9 | 10 | # Torch needs to be installed before deepspeed 11 | RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed] 12 | 13 | RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract 14 | RUN python3 -m pip install --no-cache-dir pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com 15 | RUN python3 -m pip install -U "itsdangerous<2.1.0" 16 | 17 | # Test if the image could successfully build the doc. before publishing the image 18 | RUN doc-builder build transformers transformers/docs/source/en --build_dir doc-build-dev --notebook_dir notebooks/transformers_doc --clean 19 | RUN rm -rf doc-build-dev -------------------------------------------------------------------------------- /src/transformers/models/wav2vec2_with_lm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from ...utils import _LazyModule 17 | 18 | 19 | _import_structure = {"processing_wav2vec2_with_lm": ["Wav2Vec2ProcessorWithLM"]} 20 | 21 | 22 | if TYPE_CHECKING: 23 | from .processing_wav2vec2_with_lm import Wav2Vec2ProcessorWithLM 24 | else: 25 | import sys 26 | 27 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 28 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/bert-base-cased.jsonnet: -------------------------------------------------------------------------------- 1 | local base = import 'templates/base.libsonnet'; 2 | local tpus = import 'templates/tpus.libsonnet'; 3 | local utils = import "templates/utils.libsonnet"; 4 | local volumes = import "templates/volumes.libsonnet"; 5 | 6 | local bertBaseCased = base.BaseTest { 7 | frameworkPrefix: "hf", 8 | modelName: "bert-base-cased", 9 | mode: "example", 10 | configMaps: [], 11 | 12 | timeout: 3600, # 1 hour, in seconds 13 | 14 | image: std.extVar('image'), 15 | imageTag: std.extVar('image-tag'), 16 | 17 | tpuSettings+: { 18 | softwareVersion: "pytorch-nightly", 19 | }, 20 | accelerator: tpus.v3_8, 21 | 22 | volumeMap+: { 23 | datasets: volumes.PersistentVolumeSpec { 24 | name: "huggingface-cluster-disk", 25 | mountPath: "/datasets", 26 | }, 27 | }, 28 | command: utils.scriptCommand( 29 | ||| 30 | python -m pytest -s transformers/examples/pytorch/test_xla_examples.py -v 31 | test_exit_code=$? 32 | echo "\nFinished running commands.\n" 33 | test $test_exit_code -eq 0 34 | ||| 35 | ), 36 | }; 37 | 38 | bertBaseCased.oneshotJob 39 | -------------------------------------------------------------------------------- /src/transformers/models/wav2vec2_phoneme/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from ...utils import _LazyModule 17 | 18 | 19 | _import_structure = {"tokenization_wav2vec2_phoneme": ["Wav2Vec2PhonemeCTCTokenizer"]} 20 | 21 | 22 | if TYPE_CHECKING: 23 | from .tokenization_wav2vec2_phoneme import Wav2Vec2PhonemeCTCTokenizer 24 | else: 25 | import sys 26 | 27 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 28 | -------------------------------------------------------------------------------- /examples/research_projects/vqgan-clip/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import matplotlib.pyplot as plt 4 | import torch 5 | 6 | 7 | def freeze_module(module): 8 | for param in module.parameters(): 9 | param.requires_grad = False 10 | 11 | 12 | def get_device(): 13 | device = "cuda" if torch.cuda.is_available() else "cpu" 14 | if torch.backends.mps.is_available() and torch.backends.mps.is_built(): 15 | device = "mps" 16 | if device == "mps": 17 | print( 18 | "WARNING: MPS currently doesn't seem to work, and messes up backpropagation without any visible torch" 19 | " errors. I recommend using CUDA on a colab notebook or CPU instead if you're facing inexplicable issues" 20 | " with generations." 21 | ) 22 | return device 23 | 24 | 25 | def show_pil(img): 26 | fig = plt.imshow(img) 27 | fig.axes.get_xaxis().set_visible(False) 28 | fig.axes.get_yaxis().set_visible(False) 29 | plt.show() 30 | 31 | 32 | def get_timestamp(): 33 | current_time = datetime.now() 34 | timestamp = current_time.strftime("%H:%M:%S") 35 | return timestamp 36 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/finetune_tpu.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | export TPU_NUM_CORES=8 16 | 17 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 18 | # run ./finetune_tpu.sh --help to see all the possible options 19 | python xla_spawn.py --num_cores $TPU_NUM_CORES \ 20 | finetune_trainer.py \ 21 | --learning_rate=3e-5 \ 22 | --do_train --do_eval \ 23 | --evaluation_strategy steps \ 24 | --prediction_loss_only \ 25 | --n_val 1000 \ 26 | "$@" 27 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_speech_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class ASTFeatureExtractor(metaclass=DummyObject): 6 | _backends = ["speech"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["speech"]) 10 | 11 | 12 | class MCTCTFeatureExtractor(metaclass=DummyObject): 13 | _backends = ["speech"] 14 | 15 | def __init__(self, *args, **kwargs): 16 | requires_backends(self, ["speech"]) 17 | 18 | 19 | class Speech2TextFeatureExtractor(metaclass=DummyObject): 20 | _backends = ["speech"] 21 | 22 | def __init__(self, *args, **kwargs): 23 | requires_backends(self, ["speech"]) 24 | 25 | 26 | class SpeechT5FeatureExtractor(metaclass=DummyObject): 27 | _backends = ["speech"] 28 | 29 | def __init__(self, *args, **kwargs): 30 | requires_backends(self, ["speech"]) 31 | 32 | 33 | class TvltFeatureExtractor(metaclass=DummyObject): 34 | _backends = ["speech"] 35 | 36 | def __init__(self, *args, **kwargs): 37 | requires_backends(self, ["speech"]) 38 | -------------------------------------------------------------------------------- /scripts/fsmt/tests-to-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # these scripts need to be run before any changes to FSMT-related code - it should cover all bases 17 | 18 | CUDA_VISIBLE_DEVICES="" RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py 19 | RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py 20 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/train.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0001 none none A plane is taking off. An air plane is taking off. 5.000 3 | 1 main-captions MSRvid 2012test 0004 none none A man is playing a large flute. A man is playing a flute. 3.800 4 | 2 main-captions MSRvid 2012test 0005 none none A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 3.800 5 | 3 main-captions MSRvid 2012test 0006 none none Three men are playing chess. Two men are playing chess. 2.600 6 | 4 main-captions MSRvid 2012test 0009 none none A man is playing the cello. A man seated is playing the cello. 4.250 7 | 5 main-captions MSRvid 2012test 0011 none none Some men are fighting. Two men are fighting. 4.250 8 | 6 main-captions MSRvid 2012test 0012 none none A man is smoking. A man is skating. 0.500 9 | 7 main-captions MSRvid 2012test 0013 none none The man is playing the piano. The man is playing the guitar. 1.600 10 | 8 main-captions MSRvid 2012test 0014 none none A man is playing on a guitar and singing. A woman is playing an acoustic guitar and singing. 2.200 11 | -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/scripts/tests/test_deduplicate.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from datasets import Dataset 4 | from minhash_deduplication import deduplicate_dataset, make_duplicate_clusters 5 | 6 | 7 | def get_dataset(): 8 | data_dict = { 9 | "repo_name": ["test_repo1", "test_repo2", "test_repo3"], 10 | "path": ["test_1.py", "test_2.py", "unit_test.py"], 11 | "content": ["a " * 20, "a " * 30, "b " * 7], 12 | } 13 | dataset = Dataset.from_dict(data_dict) 14 | return dataset 15 | 16 | 17 | class MakeDuplicateClustersTest(TestCase): 18 | def test_make_duplicate_clusters(self): 19 | ds = get_dataset() 20 | duplicate_clusters = make_duplicate_clusters(ds, 0.85) 21 | self.assertEqual(len(duplicate_clusters[0]), 2) 22 | 23 | def test_deduplicate_dataset(self): 24 | ds = get_dataset() 25 | ds_filter, duplicate_clusters = deduplicate_dataset(ds) 26 | self.assertEqual(len(ds_filter), 2) 27 | print(duplicate_clusters) 28 | self.assertEqual(duplicate_clusters[0][0]["copies"], 2) 29 | self.assertEqual(duplicate_clusters[0][0]["is_extreme"], True) 30 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/dev.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0000 none none A man with a hard hat is dancing. A man wearing a hard hat is dancing. 5.000 3 | 1 main-captions MSRvid 2012test 0002 none none A young child is riding a horse. A child is riding a horse. 4.750 4 | 2 main-captions MSRvid 2012test 0003 none none A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 5.000 5 | 3 main-captions MSRvid 2012test 0007 none none A woman is playing the guitar. A man is playing guitar. 2.400 6 | 4 main-captions MSRvid 2012test 0008 none none A woman is playing the flute. A man is playing a flute. 2.750 7 | 5 main-captions MSRvid 2012test 0010 none none A woman is cutting an onion. A man is cutting onions. 2.615 8 | 6 main-captions MSRvid 2012test 0015 none none A man is erasing a chalk board. The man is erasing the chalk board. 5.000 9 | 7 main-captions MSRvid 2012test 0023 none none A woman is carrying a boy. A woman is carrying her baby. 2.333 10 | 8 main-captions MSRvid 2012test 0027 none none Three men are playing guitars. Three men are on stage playing guitars. 3.750 11 | -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/scripts/bpe_training.py: -------------------------------------------------------------------------------- 1 | from arguments import TokenizerTrainingArguments 2 | from datasets import load_dataset 3 | from tqdm import tqdm 4 | 5 | from transformers import AutoTokenizer, HfArgumentParser 6 | from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode 7 | 8 | 9 | # Iterator for Training 10 | def batch_iterator(batch_size=10): 11 | for _ in tqdm(range(0, args.n_examples, batch_size)): 12 | yield [next(iter_dataset)[args.text_column] for _ in range(batch_size)] 13 | 14 | 15 | # Configuration 16 | parser = HfArgumentParser(TokenizerTrainingArguments) 17 | args = parser.parse_args() 18 | 19 | # Base tokenizer 20 | tokenizer = AutoTokenizer.from_pretrained(args.base_tokenizer) 21 | base_vocab = list(bytes_to_unicode().values()) 22 | 23 | # Load dataset 24 | dataset = load_dataset(args.dataset_name, split="train", streaming=True) 25 | iter_dataset = iter(dataset) 26 | 27 | 28 | # Training and saving 29 | new_tokenizer = tokenizer.train_new_from_iterator( 30 | batch_iterator(), vocab_size=args.vocab_size, initial_alphabet=base_vocab 31 | ) 32 | new_tokenizer.save_pretrained(args.tokenizer_name, push_to_hub=args.push_to_hub) 33 | -------------------------------------------------------------------------------- /src/transformers/sagemaker/trainer_sm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import warnings 15 | 16 | from ..trainer import Trainer 17 | from ..utils import logging 18 | 19 | 20 | logger = logging.get_logger(__name__) 21 | 22 | 23 | class SageMakerTrainer(Trainer): 24 | def __init__(self, args=None, **kwargs): 25 | warnings.warn( 26 | "`SageMakerTrainer` is deprecated and will be removed in v5 of Transformers. You can use `Trainer` " 27 | "instead.", 28 | FutureWarning, 29 | ) 30 | super().__init__(args=args, **kwargs) 31 | -------------------------------------------------------------------------------- /docker/transformers-tensorflow-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 2 | LABEL maintainer="Hugging Face" 3 | 4 | ARG DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN apt update 7 | RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg 8 | RUN python3 -m pip install --no-cache-dir --upgrade pip 9 | 10 | ARG REF=main 11 | RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF 12 | RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-tensorflow,testing] 13 | 14 | # If set to nothing, will install the latest version 15 | ARG TENSORFLOW='2.11' 16 | 17 | RUN [ ${#TENSORFLOW} -gt 0 ] && VERSION='tensorflow=='$TENSORFLOW'.*' || VERSION='tensorflow'; python3 -m pip install --no-cache-dir -U $VERSION 18 | RUN python3 -m pip uninstall -y torch flax 19 | RUN python3 -m pip install -U "itsdangerous<2.1.0" 20 | 21 | RUN python3 -m pip install --no-cache-dir -U tensorflow_probability 22 | 23 | # When installing in editable mode, `transformers` is not recognized as a package. 24 | # this line must be added in order for python to be aware of transformers. 25 | RUN cd transformers && python3 setup.py develop 26 | -------------------------------------------------------------------------------- /src/transformers/models/bert_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from ...utils import _LazyModule 18 | 19 | 20 | _import_structure = {"tokenization_bert_japanese": ["BertJapaneseTokenizer", "CharacterTokenizer", "MecabTokenizer"]} 21 | 22 | 23 | if TYPE_CHECKING: 24 | from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer 25 | 26 | else: 27 | import sys 28 | 29 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 30 | -------------------------------------------------------------------------------- /examples/research_projects/rag/finetune_rag.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path 5 | # run ./examples/rag/finetune_rag.sh --help to see all the possible options 6 | 7 | python examples/rag/finetune_rag.py \ 8 | --data_dir $DATA_DIR \ 9 | --output_dir $OUTPUT_DIR \ 10 | --model_name_or_path $MODEL_NAME_OR_PATH \ 11 | --model_type rag_sequence \ 12 | --fp16 \ 13 | --gpus 8 \ 14 | --profile \ 15 | --do_train \ 16 | --do_predict \ 17 | --n_val -1 \ 18 | --train_batch_size 8 \ 19 | --eval_batch_size 1 \ 20 | --max_source_length 128 \ 21 | --max_target_length 25 \ 22 | --val_max_target_length 25 \ 23 | --test_max_target_length 25 \ 24 | --label_smoothing 0.1 \ 25 | --dropout 0.1 \ 26 | --attention_dropout 0.1 \ 27 | --weight_decay 0.001 \ 28 | --adam_epsilon 1e-08 \ 29 | --max_grad_norm 0.1 \ 30 | --lr_scheduler polynomial \ 31 | --learning_rate 3e-05 \ 32 | --num_train_epochs 100 \ 33 | --warmup_steps 500 \ 34 | --gradient_accumulation_steps 1 \ 35 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/run_pos.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f ./dev.txt ]; then 2 | echo "Download dev dataset...." 3 | curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu' 4 | fi 5 | 6 | if ! [ -f ./test.txt ]; then 7 | echo "Download test dataset...." 8 | curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu' 9 | fi 10 | 11 | if ! [ -f ./train.txt ]; then 12 | echo "Download train dataset...." 13 | curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu' 14 | fi 15 | 16 | export MAX_LENGTH=200 17 | export BERT_MODEL=bert-base-uncased 18 | export OUTPUT_DIR=postagger-model 19 | export BATCH_SIZE=32 20 | export NUM_EPOCHS=3 21 | export SAVE_STEPS=750 22 | export SEED=1 23 | 24 | python3 run_ner.py \ 25 | --task_type POS \ 26 | --data_dir . \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | 38 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/scripts/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from transformers import AutoTokenizer 4 | 5 | 6 | dataset = sys.argv[1] 7 | model_name_or_path = sys.argv[2] 8 | max_len = int(sys.argv[3]) 9 | 10 | subword_len_counter = 0 11 | 12 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 13 | max_len -= tokenizer.num_special_tokens_to_add() 14 | 15 | with open(dataset, "rt") as f_p: 16 | for line in f_p: 17 | line = line.rstrip() 18 | 19 | if not line: 20 | print(line) 21 | subword_len_counter = 0 22 | continue 23 | 24 | token = line.split()[0] 25 | 26 | current_subwords_len = len(tokenizer.tokenize(token)) 27 | 28 | # Token contains strange control characters like \x96 or \x95 29 | # Just filter out the complete line 30 | if current_subwords_len == 0: 31 | continue 32 | 33 | if (subword_len_counter + current_subwords_len) > max_len: 34 | print("") 35 | print(line) 36 | subword_len_counter = current_subwords_len 37 | continue 38 | 39 | subword_len_counter += current_subwords_len 40 | 41 | print(line) 42 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/run_chunk.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f ./dev.txt ]; then 2 | echo "Downloading CONLL2003 dev dataset...." 3 | curl -L -o ./dev.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/valid.txt' 4 | fi 5 | 6 | if ! [ -f ./test.txt ]; then 7 | echo "Downloading CONLL2003 test dataset...." 8 | curl -L -o ./test.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/test.txt' 9 | fi 10 | 11 | if ! [ -f ./train.txt ]; then 12 | echo "Downloading CONLL2003 train dataset...." 13 | curl -L -o ./train.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/train.txt' 14 | fi 15 | 16 | export MAX_LENGTH=200 17 | export BERT_MODEL=bert-base-uncased 18 | export OUTPUT_DIR=chunker-model 19 | export BATCH_SIZE=32 20 | export NUM_EPOCHS=3 21 | export SAVE_STEPS=750 22 | export SEED=1 23 | 24 | python3 run_ner.py \ 25 | --task_type Chunk \ 26 | --data_dir . \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | 38 | -------------------------------------------------------------------------------- /model_cards/README.md: -------------------------------------------------------------------------------- 1 | ## 🔥 Model cards now live inside each huggingface.co model repo 🔥 2 | 3 | 4 | For consistency, ease of use and scalability, `README.md` model cards now live directly inside each model repo on the HuggingFace model hub. 5 | 6 | ### How to update a model card 7 | 8 | You can directly update a model card inside any model repo you have **write access** to, i.e.: 9 | - a model under your username namespace 10 | - a model under any organization you are a part of. 11 | 12 | You can either: 13 | - update it, commit and push using your usual git workflow (command line, GUI, etc.) 14 | - or edit it directly from the website's UI. 15 | 16 | **What if you want to create or update a model card for a model you don't have write access to?** 17 | 18 | In that case, you can open a [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions)! Check out the [announcement](https://huggingface.co/blog/community-update) of this feature for more details 🤗. 19 | 20 | ### What happened to the model cards here? 21 | 22 | We migrated every model card from the repo to its corresponding huggingface.co model repo. Individual commits were preserved, and they link back to the original commit on GitHub. 23 | -------------------------------------------------------------------------------- /.github/workflows/update_metdata.yml: -------------------------------------------------------------------------------- 1 | name: Update Transformers metadata 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - update_transformers_metadata 8 | 9 | jobs: 10 | build_and_package: 11 | runs-on: ubuntu-latest 12 | defaults: 13 | run: 14 | shell: bash -l {0} 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - name: Load cached virtual environment 20 | uses: actions/cache@v2 21 | id: cache 22 | with: 23 | path: ~/venv/ 24 | key: v3-metadata-${{ hashFiles('setup.py') }} 25 | 26 | - name: Create virtual environment on cache miss 27 | if: steps.cache.outputs.cache-hit != 'true' 28 | run: | 29 | python -m venv ~/venv && . ~/venv/bin/activate 30 | pip install --upgrade pip 31 | 32 | - name: Setup environment 33 | run: | 34 | . ~/venv/bin/activate 35 | pip install git+https://github.com/huggingface/transformers#egg=transformers[dev] 36 | 37 | - name: Update metadata 38 | run: | 39 | . ~/venv/bin/activate 40 | python utils/update_metadata.py --token ${{ secrets.SYLVAIN_HF_TOKEN }} --commit_sha ${{ github.sha }} 41 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-model-addition.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F31F New model addition" 2 | description: Submit a proposal/request to implement a new model 3 | labels: [ "New model" ] 4 | 5 | body: 6 | - type: textarea 7 | id: description-request 8 | validations: 9 | required: true 10 | attributes: 11 | label: Model description 12 | description: | 13 | Put any and all important information relative to the model 14 | 15 | - type: checkboxes 16 | id: information-tasks 17 | attributes: 18 | label: Open source status 19 | description: | 20 | Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `transformers`. 21 | options: 22 | - label: "The model implementation is available" 23 | - label: "The model weights are available" 24 | 25 | - type: textarea 26 | id: additional-info 27 | attributes: 28 | label: Provide useful links for the implementation 29 | description: | 30 | Please provide information regarding the implementation, the weights, and the authors. 31 | Please mention the authors by @gh-username if you're aware of their usernames. 32 | -------------------------------------------------------------------------------- /.github/conda/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "transformers" %} 2 | 3 | package: 4 | name: "{{ name|lower }}" 5 | version: "{{ TRANSFORMERS_VERSION }}" 6 | 7 | source: 8 | path: ../../ 9 | 10 | build: 11 | noarch: python 12 | 13 | requirements: 14 | host: 15 | - python 16 | - pip 17 | - numpy >=1.17 18 | - dataclasses 19 | - importlib_metadata 20 | - huggingface_hub 21 | - packaging 22 | - filelock 23 | - requests 24 | - tqdm >=4.27 25 | - sacremoses 26 | - regex !=2019.12.17 27 | - protobuf 28 | - tokenizers >=0.11.1,!=0.11.3,<0.13 29 | - pyyaml >=5.1 30 | run: 31 | - python 32 | - numpy >=1.17 33 | - dataclasses 34 | - importlib_metadata 35 | - huggingface_hub 36 | - packaging 37 | - filelock 38 | - requests 39 | - tqdm >=4.27 40 | - sacremoses 41 | - regex !=2019.12.17 42 | - protobuf 43 | - tokenizers >=0.11.1,!=0.11.3,<0.13 44 | - pyyaml >=5.1 45 | 46 | test: 47 | imports: 48 | - transformers 49 | 50 | about: 51 | home: https://huggingface.co 52 | license: Apache License 2.0 53 | license_file: LICENSE 54 | summary: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch and TensorFlow 2.0." 55 | -------------------------------------------------------------------------------- /docs/source/en/sagemaker.mdx: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Run training on Amazon SageMaker 18 | 19 | The documentation has been moved to [hf.co/docs/sagemaker](https://huggingface.co/docs/sagemaker). This page will be removed in `transformers` 5.0. 20 | 21 | ### Table of Content 22 | 23 | - [Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK](https://huggingface.co/docs/sagemaker/train) 24 | - [Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK](https://huggingface.co/docs/sagemaker/inference) 25 | - [Frequently Asked Questions](https://huggingface.co/docs/sagemaker/faq) 26 | -------------------------------------------------------------------------------- /src/transformers/generation_tf_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. 3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import warnings 18 | 19 | from .generation import TFGenerationMixin 20 | 21 | 22 | class TFGenerationMixin(TFGenerationMixin): 23 | # warning at import time 24 | warnings.warn( 25 | "Importing `TFGenerationMixin` from `src/transformers/generation_tf_utils.py` is deprecated and will " 26 | "be removed in Transformers v5. Import as `from transformers import TFGenerationMixin` instead.", 27 | FutureWarning, 28 | ) 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F680 Feature request" 2 | description: Submit a proposal/request for a new transformers feature 3 | labels: [ "feature" ] 4 | body: 5 | - type: textarea 6 | id: feature-request 7 | validations: 8 | required: true 9 | attributes: 10 | label: Feature request 11 | description: | 12 | A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist. 13 | 14 | - type: textarea 15 | id: motivation 16 | validations: 17 | required: true 18 | attributes: 19 | label: Motivation 20 | description: | 21 | Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. 22 | 23 | 24 | - type: textarea 25 | id: contribution 26 | validations: 27 | required: true 28 | attributes: 29 | label: Your contribution 30 | description: | 31 | Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md) 32 | -------------------------------------------------------------------------------- /docs/source/es/sagemaker.mdx: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Ejecutar el entrenamiento en Amazon SageMaker 18 | 19 | La documentación ha sido trasladada a [hf.co/docs/sagemaker](https://huggingface.co/docs/sagemaker). Esta página será eliminada en `transformers` 5.0. 20 | 21 | ### Tabla de contenido 22 | 23 | - [Entrenar modelos de Hugging Face en Amazon SageMaker con SageMaker Python SDK](https://huggingface.co/docs/sagemaker/train) 24 | - [Desplegar modelos de Hugging Face en Amazon SageMaker con SageMaker Python SDK](https://huggingface.co/docs/sagemaker/inference) 25 | - [Preguntas Frecuentes](https://huggingface.co/docs/sagemaker/faq) 26 | -------------------------------------------------------------------------------- /src/transformers/generation_flax_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Google AI Flax Team Authors, and The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import warnings 18 | 19 | from .generation import FlaxGenerationMixin 20 | 21 | 22 | class FlaxGenerationMixin(FlaxGenerationMixin): 23 | # warning at import time 24 | warnings.warn( 25 | "Importing `FlaxGenerationMixin` from `src/transformers/generation_flax_utils.py` is deprecated and will " 26 | "be removed in Transformers v5. Import as `from transformers import FlaxGenerationMixin` instead.", 27 | FutureWarning, 28 | ) 29 | -------------------------------------------------------------------------------- /src/transformers/generation_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Language Team Authors, Facebook AI Research authors and The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import warnings 18 | 19 | from .generation import GenerationMixin 20 | 21 | 22 | class GenerationMixin(GenerationMixin): 23 | # warning at import time 24 | warnings.warn( 25 | "Importing `GenerationMixin` from `src/transformers/generation_utils.py` is deprecated and will " 26 | "be removed in Transformers v5. Import as `from transformers import GenerationMixin` instead.", 27 | FutureWarning, 28 | ) 29 | -------------------------------------------------------------------------------- /examples/research_projects/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Research projects 18 | 19 | This folder contains various research projects using 🤗 Transformers. They are not maintained and require a specific 20 | version of 🤗 Transformers that is indicated in the requirements file of each folder. Updating them to the most recent version of the library will require some work. 21 | 22 | To use any of them, just run the command 23 | ``` 24 | pip install -r requirements.txt 25 | ``` 26 | inside the folder of your choice. 27 | 28 | If you need help with any of those, contact the author(s), indicated at the top of the `README` of each folder. 29 | -------------------------------------------------------------------------------- /utils/test_module/custom_pipeline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from transformers import Pipeline 4 | 5 | 6 | def softmax(outputs): 7 | maxes = np.max(outputs, axis=-1, keepdims=True) 8 | shifted_exp = np.exp(outputs - maxes) 9 | return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True) 10 | 11 | 12 | class PairClassificationPipeline(Pipeline): 13 | def _sanitize_parameters(self, **kwargs): 14 | preprocess_kwargs = {} 15 | if "second_text" in kwargs: 16 | preprocess_kwargs["second_text"] = kwargs["second_text"] 17 | return preprocess_kwargs, {}, {} 18 | 19 | def preprocess(self, text, second_text=None): 20 | return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework) 21 | 22 | def _forward(self, model_inputs): 23 | return self.model(**model_inputs) 24 | 25 | def postprocess(self, model_outputs): 26 | logits = model_outputs.logits[0].numpy() 27 | probabilities = softmax(logits) 28 | 29 | best_class = np.argmax(probabilities) 30 | label = self.model.config.id2label[best_class] 31 | score = probabilities[best_class].item() 32 | logits = logits.tolist() 33 | return {"label": label, "score": score, "logits": logits} 34 | -------------------------------------------------------------------------------- /docs/source/en/perf_infer_gpu_many.mdx: -------------------------------------------------------------------------------- 1 | 11 | 12 | # Efficient Inference on a Multiple GPUs 13 | 14 | This document contains information on how to efficiently infer on a multiple GPUs. 15 | 16 | 17 | Note: A multi GPU setup can use the majority of the strategies described in the [single GPU section](./perf_infer_gpu_one). You must be aware of simple techniques, though, that can be used for a better usage. 18 | 19 | 20 | 21 | ## `BetterTransformer` for faster inference 22 | 23 | We have recently integrated `BetterTransformer` for faster inference on multi-GPU for text, image and audio models. Check the documentation about this integration [here](https://huggingface.co/docs/optimum/bettertransformer/overview) for more details. 24 | -------------------------------------------------------------------------------- /.github/workflows/release-conda.yml: -------------------------------------------------------------------------------- 1 | name: Release - Conda 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | branches: 8 | - conda_* 9 | 10 | env: 11 | ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} 12 | 13 | jobs: 14 | build_and_package: 15 | runs-on: ubuntu-latest 16 | defaults: 17 | run: 18 | shell: bash -l {0} 19 | 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v1 23 | 24 | - name: Install miniconda 25 | uses: conda-incubator/setup-miniconda@v2 26 | with: 27 | auto-update-conda: true 28 | auto-activate-base: false 29 | python-version: 3.8 30 | activate-environment: "build-transformers" 31 | channels: huggingface 32 | 33 | - name: Setup conda env 34 | run: | 35 | conda install -c defaults anaconda-client conda-build 36 | 37 | - name: Extract version 38 | run: echo "TRANSFORMERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV 39 | 40 | - name: Build conda packages 41 | run: | 42 | conda info 43 | conda list 44 | conda-build .github/conda 45 | 46 | - name: Upload to Anaconda 47 | run: anaconda upload `conda-build .github/conda --output` --force 48 | -------------------------------------------------------------------------------- /docs/source/en/main_classes/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration 14 | 15 | The base class [`PretrainedConfig`] implements the common methods for loading/saving a configuration 16 | either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded 17 | from HuggingFace's AWS S3 repository). 18 | 19 | Each derived config class implements model specific attributes. Common attributes present in all config classes are: 20 | `hidden_size`, `num_attention_heads`, and `num_hidden_layers`. Text models further implement: 21 | `vocab_size`. 22 | 23 | 24 | ## PretrainedConfig 25 | 26 | [[autodoc]] PretrainedConfig 27 | - push_to_hub 28 | - all 29 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/run_pos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if ! [ -f ./dev.txt ]; then 3 | echo "Download dev dataset...." 4 | curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu' 5 | fi 6 | 7 | if ! [ -f ./test.txt ]; then 8 | echo "Download test dataset...." 9 | curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu' 10 | fi 11 | 12 | if ! [ -f ./train.txt ]; then 13 | echo "Download train dataset...." 14 | curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu' 15 | fi 16 | 17 | export MAX_LENGTH=200 18 | export BERT_MODEL=bert-base-uncased 19 | export OUTPUT_DIR=postagger-model 20 | export BATCH_SIZE=32 21 | export NUM_EPOCHS=3 22 | export SAVE_STEPS=750 23 | export SEED=1 24 | 25 | 26 | # Add parent directory to python path to access lightning_base.py 27 | export PYTHONPATH="../":"${PYTHONPATH}" 28 | 29 | python3 run_ner.py --data_dir ./ \ 30 | --task_type POS \ 31 | --model_name_or_path $BERT_MODEL \ 32 | --output_dir $OUTPUT_DIR \ 33 | --max_seq_length $MAX_LENGTH \ 34 | --num_train_epochs $NUM_EPOCHS \ 35 | --train_batch_size $BATCH_SIZE \ 36 | --seed $SEED \ 37 | --gpus 1 \ 38 | --do_train \ 39 | --do_predict 40 | -------------------------------------------------------------------------------- /src/transformers/models/deformable_detr/custom_kernel/cuda/ms_deform_attn_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | at::Tensor ms_deform_attn_cuda_forward( 15 | const at::Tensor &value, 16 | const at::Tensor &spatial_shapes, 17 | const at::Tensor &level_start_index, 18 | const at::Tensor &sampling_loc, 19 | const at::Tensor &attn_weight, 20 | const int im2col_step); 21 | 22 | std::vector ms_deform_attn_cuda_backward( 23 | const at::Tensor &value, 24 | const at::Tensor &spatial_shapes, 25 | const at::Tensor &level_start_index, 26 | const at::Tensor &sampling_loc, 27 | const at::Tensor &attn_weight, 28 | const at::Tensor &grad_output, 29 | const int im2col_step); 30 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/minify_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from pathlib import Path 17 | 18 | import fire 19 | 20 | 21 | def minify(src_dir: str, dest_dir: str, n: int): 22 | """Write first n lines of each file f in src_dir to dest_dir/f""" 23 | src_dir = Path(src_dir) 24 | dest_dir = Path(dest_dir) 25 | dest_dir.mkdir(exist_ok=True) 26 | for path in src_dir.iterdir(): 27 | new = [x.rstrip() for x in list(path.open().readlines())][:n] 28 | dest_path = dest_dir.joinpath(path.name) 29 | print(dest_path) 30 | dest_path.open("w").write("\n".join(new)) 31 | 32 | 33 | if __name__ == "__main__": 34 | fire.Fire(minify) 35 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/rouge_cli.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import fire 16 | 17 | from utils import calculate_rouge, save_json 18 | 19 | 20 | def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs): 21 | """Kwargs will be passed to calculate_rouge""" 22 | pred_lns = [x.strip() for x in open(pred_path).readlines()] 23 | tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)] 24 | metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs) 25 | if save_path is not None: 26 | save_json(metrics, save_path, indent=None) 27 | return metrics # these print nicely 28 | 29 | 30 | if __name__ == "__main__": 31 | fire.Fire(calculate_rouge_path) 32 | -------------------------------------------------------------------------------- /examples/pytorch/text-generation/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | ## Language generation 18 | 19 | Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-generation/run_generation.py). 20 | 21 | Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL, XLNet, CTRL. 22 | A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you 23 | can try out the different models available in the library. 24 | 25 | Example usage: 26 | 27 | ```bash 28 | python run_generation.py \ 29 | --model_type=gpt2 \ 30 | --model_name_or_path=gpt2 31 | ``` 32 | -------------------------------------------------------------------------------- /src/transformers/models/deformable_detr/custom_kernel/cpu/ms_deform_attn_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | at::Tensor 15 | ms_deform_attn_cpu_forward( 16 | const at::Tensor &value, 17 | const at::Tensor &spatial_shapes, 18 | const at::Tensor &level_start_index, 19 | const at::Tensor &sampling_loc, 20 | const at::Tensor &attn_weight, 21 | const int im2col_step); 22 | 23 | std::vector 24 | ms_deform_attn_cpu_backward( 25 | const at::Tensor &value, 26 | const at::Tensor &spatial_shapes, 27 | const at::Tensor &level_start_index, 28 | const at::Tensor &sampling_loc, 29 | const at::Tensor &attn_weight, 30 | const at::Tensor &grad_output, 31 | const int im2col_step); 32 | 33 | -------------------------------------------------------------------------------- /src/transformers/models/dpt/feature_extraction_dpt.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Feature extractor class for DPT.""" 16 | 17 | import warnings 18 | 19 | from ...utils import logging 20 | from .image_processing_dpt import DPTImageProcessor 21 | 22 | 23 | logger = logging.get_logger(__name__) 24 | 25 | 26 | class DPTFeatureExtractor(DPTImageProcessor): 27 | def __init__(self, *args, **kwargs) -> None: 28 | warnings.warn( 29 | "The class DPTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please" 30 | " use DPTImageProcessor instead.", 31 | FutureWarning, 32 | ) 33 | super().__init__(*args, **kwargs) 34 | -------------------------------------------------------------------------------- /src/transformers/models/vit/feature_extraction_vit.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Feature extractor class for ViT.""" 16 | 17 | import warnings 18 | 19 | from ...utils import logging 20 | from .image_processing_vit import ViTImageProcessor 21 | 22 | 23 | logger = logging.get_logger(__name__) 24 | 25 | 26 | class ViTFeatureExtractor(ViTImageProcessor): 27 | def __init__(self, *args, **kwargs) -> None: 28 | warnings.warn( 29 | "The class ViTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please" 30 | " use ViTImageProcessor instead.", 31 | FutureWarning, 32 | ) 33 | super().__init__(*args, **kwargs) 34 | -------------------------------------------------------------------------------- /docs/source/en/main_classes/image_processor.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Image Processor 14 | 15 | An image processor is in charge of preparing input features for vision models and post processing their outputs. This includes transformations such as resizing, normalization, and conversion to PyTorch, TensorFlow, Flax and Numpy tensors. It may also include model specific post-processing such as converting logits to segmentation masks. 16 | 17 | 18 | ## ImageProcessingMixin 19 | 20 | [[autodoc]] image_processing_utils.ImageProcessingMixin 21 | - from_pretrained 22 | - save_pretrained 23 | 24 | ## BatchFeature 25 | 26 | [[autodoc]] BatchFeature 27 | 28 | ## BaseImageProcessor 29 | 30 | [[autodoc]] image_processing_utils.BaseImageProcessor 31 | -------------------------------------------------------------------------------- /src/transformers/models/beit/feature_extraction_beit.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Feature extractor class for BEiT.""" 16 | 17 | import warnings 18 | 19 | from ...utils import logging 20 | from .image_processing_beit import BeitImageProcessor 21 | 22 | 23 | logger = logging.get_logger(__name__) 24 | 25 | 26 | class BeitFeatureExtractor(BeitImageProcessor): 27 | def __init__(self, *args, **kwargs) -> None: 28 | warnings.warn( 29 | "The class BeitFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please" 30 | " use BeitImageProcessor instead.", 31 | FutureWarning, 32 | ) 33 | super().__init__(*args, **kwargs) 34 | -------------------------------------------------------------------------------- /src/transformers/models/clip/feature_extraction_clip.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Feature extractor class for CLIP.""" 16 | 17 | import warnings 18 | 19 | from ...utils import logging 20 | from .image_processing_clip import CLIPImageProcessor 21 | 22 | 23 | logger = logging.get_logger(__name__) 24 | 25 | 26 | class CLIPFeatureExtractor(CLIPImageProcessor): 27 | def __init__(self, *args, **kwargs) -> None: 28 | warnings.warn( 29 | "The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please" 30 | " use CLIPImageProcessor instead.", 31 | FutureWarning, 32 | ) 33 | super().__init__(*args, **kwargs) 34 | -------------------------------------------------------------------------------- /src/transformers/models/deit/feature_extraction_deit.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Feature extractor class for DeiT.""" 16 | 17 | import warnings 18 | 19 | from ...utils import logging 20 | from .image_processing_deit import DeiTImageProcessor 21 | 22 | 23 | logger = logging.get_logger(__name__) 24 | 25 | 26 | class DeiTFeatureExtractor(DeiTImageProcessor): 27 | def __init__(self, *args, **kwargs) -> None: 28 | warnings.warn( 29 | "The class DeiTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please" 30 | " use DeiTImageProcessor instead.", 31 | FutureWarning, 32 | ) 33 | super().__init__(*args, **kwargs) 34 | -------------------------------------------------------------------------------- /src/transformers/models/detr/feature_extraction_detr.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Feature extractor class for DETR.""" 16 | 17 | import warnings 18 | 19 | from ...utils import logging 20 | from .image_processing_detr import DetrImageProcessor 21 | 22 | 23 | logger = logging.get_logger(__name__) 24 | 25 | 26 | class DetrFeatureExtractor(DetrImageProcessor): 27 | def __init__(self, *args, **kwargs) -> None: 28 | warnings.warn( 29 | "The class DetrFeatureExtractor is deprecated and will be removed in version 5 of Transformers." 30 | " Please use DetrImageProcessor instead.", 31 | FutureWarning, 32 | ) 33 | super().__init__(*args, **kwargs) 34 | --------------------------------------------------------------------------------