├── tests ├── __init__.py ├── tools │ └── __init__.py ├── utils │ └── __init__.py ├── benchmark │ └── __init__.py ├── fixtures │ ├── empty.txt │ ├── dummy-config.json │ ├── merges.txt │ ├── input.txt │ ├── test_entity_vocab.json │ ├── tests_samples │ │ ├── .gitignore │ │ ├── COCO │ │ │ ├── 000000039769.png │ │ │ ├── coco_panoptic │ │ │ │ └── 000000039769.png │ │ │ └── coco_panoptic_annotations.txt │ │ ├── GermEval │ │ │ └── labels.txt │ │ └── STS-B │ │ │ ├── train.tsv │ │ │ └── dev.tsv │ ├── spiece.model │ ├── preprocessor_config.json │ ├── test_sentencepiece.model │ ├── test_sentencepiece_bpe.model │ ├── vocab.txt │ ├── dummy_feature_extractor_config.json │ ├── test_sentencepiece_no_bos.model │ ├── test_sentencepiece_bpe_char.model │ ├── test_sentencepiece_with_bytefallback.model │ ├── vocab.json │ └── add_distilbert_like_config.json ├── generation │ └── __init__.py ├── models │ ├── __init__.py │ ├── bit │ │ └── __init__.py │ ├── cpm │ │ └── __init__.py │ ├── cvt │ │ └── __init__.py │ ├── dit │ │ └── __init__.py │ ├── dpr │ │ └── __init__.py │ ├── dpt │ │ └── __init__.py │ ├── esm │ │ └── __init__.py │ ├── git │ │ └── __init__.py │ ├── led │ │ └── __init__.py │ ├── mpt │ │ └── __init__.py │ ├── mra │ │ └── __init__.py │ ├── mt5 │ │ └── __init__.py │ ├── mvp │ │ └── __init__.py │ ├── nat │ │ └── __init__.py │ ├── opt │ │ └── __init__.py │ ├── pvt │ │ └── __init__.py │ ├── rag │ │ └── __init__.py │ ├── sam │ │ └── __init__.py │ ├── sew │ │ └── __init__.py │ ├── t5 │ │ └── __init__.py │ ├── vit │ │ └── __init__.py │ ├── xlm │ │ └── __init__.py │ ├── albert │ │ └── __init__.py │ ├── align │ │ └── __init__.py │ ├── altclip │ │ └── __init__.py │ ├── auto │ │ └── __init__.py │ ├── bark │ │ └── __init__.py │ ├── bart │ │ └── __init__.py │ ├── barthez │ │ └── __init__.py │ ├── bartpho │ │ └── __init__.py │ ├── beit │ │ └── __init__.py │ ├── bert │ │ └── __init__.py │ ├── bertweet │ │ └── __init__.py │ ├── big_bird │ │ └── __init__.py │ ├── biogpt │ │ └── __init__.py │ ├── blip │ │ └── __init__.py │ ├── blip_2 │ │ └── __init__.py │ ├── bloom │ │ └── __init__.py │ ├── bros │ │ └── __init__.py │ ├── byt5 │ │ └── __init__.py │ ├── canine │ │ └── __init__.py │ ├── clap │ │ └── __init__.py │ ├── clip │ │ └── __init__.py │ ├── clipseg │ │ └── __init__.py │ ├── codegen │ │ └── __init__.py │ ├── convbert │ │ └── __init__.py │ ├── convnext │ │ └── __init__.py │ ├── cpmant │ │ └── __init__.py │ ├── ctrl │ │ └── __init__.py │ ├── data2vec │ │ └── __init__.py │ ├── deberta │ │ └── __init__.py │ ├── deit │ │ └── __init__.py │ ├── deta │ │ └── __init__.py │ ├── detr │ │ └── __init__.py │ ├── dinat │ │ └── __init__.py │ ├── dinov2 │ │ └── __init__.py │ ├── donut │ │ └── __init__.py │ ├── electra │ │ └── __init__.py │ ├── encodec │ │ └── __init__.py │ ├── ernie │ │ └── __init__.py │ ├── ernie_m │ │ └── __init__.py │ ├── falcon │ │ └── __init__.py │ ├── flaubert │ │ └── __init__.py │ ├── flava │ │ └── __init__.py │ ├── fnet │ │ └── __init__.py │ ├── focalnet │ │ └── __init__.py │ ├── fsmt │ │ └── __init__.py │ ├── funnel │ │ └── __init__.py │ ├── glpn │ │ └── __init__.py │ ├── gpt2 │ │ └── __init__.py │ ├── gpt_neo │ │ └── __init__.py │ ├── gpt_neox │ │ └── __init__.py │ ├── gpt_sw3 │ │ └── __init__.py │ ├── gptj │ │ └── __init__.py │ ├── groupvit │ │ └── __init__.py │ ├── herbert │ │ └── __init__.py │ ├── hubert │ │ └── __init__.py │ ├── ibert │ │ └── __init__.py │ ├── idefics │ │ └── __init__.py │ ├── imagegpt │ │ └── __init__.py │ ├── informer │ │ └── __init__.py │ ├── jukebox │ │ └── __init__.py │ ├── layoutlm │ │ └── __init__.py │ ├── levit │ │ └── __init__.py │ ├── lilt │ │ └── __init__.py │ ├── llama │ │ └── __init__.py │ ├── longt5 │ │ └── __init__.py │ ├── luke │ │ └── __init__.py │ ├── lxmert │ │ └── __init__.py │ ├── m2m_100 │ │ └── __init__.py │ ├── marian │ │ └── __init__.py │ ├── markuplm │ │ └── __init__.py │ ├── mbart │ │ └── __init__.py │ ├── mbart50 │ │ └── __init__.py │ ├── mega │ │ └── __init__.py │ ├── mgp_str │ │ └── __init__.py │ ├── mluke │ │ └── __init__.py │ ├── mpnet │ │ └── __init__.py │ ├── musicgen │ │ └── __init__.py │ ├── nezha │ │ └── __init__.py │ ├── nllb │ │ └── __init__.py │ ├── nllb_moe │ │ └── __init__.py │ ├── openai │ │ └── __init__.py │ ├── owlvit │ │ └── __init__.py │ ├── pegasus │ │ └── __init__.py │ ├── phobert │ │ └── __init__.py │ ├── plbart │ │ └── __init__.py │ ├── qdqbert │ │ └── __init__.py │ ├── realm │ │ └── __init__.py │ ├── reformer │ │ └── __init__.py │ ├── regnet │ │ └── __init__.py │ ├── rembert │ │ └── __init__.py │ ├── resnet │ │ └── __init__.py │ ├── roberta │ │ └── __init__.py │ ├── roc_bert │ │ └── __init__.py │ ├── roformer │ │ └── __init__.py │ ├── rwkv │ │ └── __init__.py │ ├── sew_d │ │ └── __init__.py │ ├── speecht5 │ │ └── __init__.py │ ├── splinter │ │ └── __init__.py │ ├── swin │ │ └── __init__.py │ ├── swin2sr │ │ └── __init__.py │ ├── swinv2 │ │ └── __init__.py │ ├── tapas │ │ └── __init__.py │ ├── trocr │ │ └── __init__.py │ ├── tvlt │ │ └── __init__.py │ ├── upernet │ │ └── __init__.py │ ├── videomae │ │ └── __init__.py │ ├── vilt │ │ └── __init__.py │ ├── vit_mae │ │ └── __init__.py │ ├── vit_msn │ │ └── __init__.py │ ├── vitdet │ │ └── __init__.py │ ├── vits │ │ └── __init__.py │ ├── vivit │ │ └── __init__.py │ ├── wav2vec2 │ │ └── __init__.py │ ├── wavlm │ │ └── __init__.py │ ├── whisper │ │ └── __init__.py │ ├── x_clip │ │ └── __init__.py │ ├── xglm │ │ └── __init__.py │ ├── xlnet │ │ └── __init__.py │ ├── xmod │ │ └── __init__.py │ ├── yolos │ │ └── __init__.py │ ├── yoso │ │ └── __init__.py │ ├── autoformer │ │ └── __init__.py │ ├── bert_japanese │ │ └── __init__.py │ ├── blenderbot │ │ └── __init__.py │ ├── bridgetower │ │ └── __init__.py │ ├── camembert │ │ └── __init__.py │ ├── chinese_clip │ │ └── __init__.py │ ├── code_llama │ │ └── __init__.py │ ├── convnextv2 │ │ └── __init__.py │ ├── deberta_v2 │ │ └── __init__.py │ ├── distilbert │ │ └── __init__.py │ ├── efficientnet │ │ └── __init__.py │ ├── gpt_bigcode │ │ └── __init__.py │ ├── graphormer │ │ └── __init__.py │ ├── instructblip │ │ └── __init__.py │ ├── layoutlmv2 │ │ └── __init__.py │ ├── layoutlmv3 │ │ └── __init__.py │ ├── layoutxlm │ │ └── __init__.py │ ├── longformer │ │ └── __init__.py │ ├── mask2former │ │ └── __init__.py │ ├── maskformer │ │ └── __init__.py │ ├── megatron_bert │ │ └── __init__.py │ ├── megatron_gpt2 │ │ └── __init__.py │ ├── mobilebert │ │ └── __init__.py │ ├── mobilenet_v1 │ │ └── __init__.py │ ├── mobilenet_v2 │ │ └── __init__.py │ ├── mobilevit │ │ └── __init__.py │ ├── mobilevitv2 │ │ └── __init__.py │ ├── nystromformer │ │ └── __init__.py │ ├── oneformer │ │ └── __init__.py │ ├── pegasus_x │ │ └── __init__.py │ ├── perceiver │ │ └── __init__.py │ ├── persimmon │ │ └── __init__.py │ ├── pix2struct │ │ └── __init__.py │ ├── poolformer │ │ └── __init__.py │ ├── pop2piano │ │ └── __init__.py │ ├── prophetnet │ │ └── __init__.py │ ├── segformer │ │ └── __init__.py │ ├── squeezebert │ │ └── __init__.py │ ├── swiftformer │ │ └── __init__.py │ ├── timesformer │ │ └── __init__.py │ ├── timm_backbone │ │ └── __init__.py │ ├── transfo_xl │ │ └── __init__.py │ ├── umt5 │ │ └── __init__.py │ ├── unispeech │ │ └── __init__.py │ ├── unispeech_sat │ │ └── __init__.py │ ├── visual_bert │ │ └── __init__.py │ ├── vit_hybrid │ │ └── __init__.py │ ├── xlm_roberta │ │ └── __init__.py │ ├── bert_generation │ │ └── __init__.py │ ├── bigbird_pegasus │ │ └── __init__.py │ ├── blenderbot_small │ │ └── __init__.py │ ├── conditional_detr │ │ └── __init__.py │ ├── deformable_detr │ │ └── __init__.py │ ├── efficientformer │ │ └── __init__.py │ ├── encoder_decoder │ │ └── __init__.py │ ├── gpt_neox_japanese │ │ └── __init__.py │ ├── gptsan_japanese │ │ └── __init__.py │ ├── speech_to_text │ │ └── __init__.py │ ├── speech_to_text_2 │ │ └── __init__.py │ ├── table_transformer │ │ └── __init__.py │ ├── wav2vec2_conformer │ │ └── __init__.py │ ├── wav2vec2_phoneme │ │ └── __init__.py │ ├── wav2vec2_with_lm │ │ └── __init__.py │ ├── xlm_prophetnet │ │ └── __init__.py │ ├── xlm_roberta_xl │ │ └── __init__.py │ ├── decision_transformer │ │ └── __init__.py │ ├── roberta_prelayernorm │ │ └── __init__.py │ ├── speech_encoder_decoder │ │ └── __init__.py │ ├── switch_transformers │ │ └── __init__.py │ ├── time_series_transformer │ │ └── __init__.py │ ├── vision_encoder_decoder │ │ └── __init__.py │ ├── vision_text_dual_encoder │ │ └── __init__.py │ └── audio_spectrogram_transformer │ │ └── __init__.py ├── pipelines │ └── __init__.py ├── trainer │ └── __init__.py ├── optimization │ └── __init__.py ├── tokenization │ └── __init__.py ├── bettertransformer │ └── __init__.py ├── quantization │ ├── bnb │ │ └── __init__.py │ └── gptq │ │ └── __init__.py ├── deepspeed │ └── vit_feature_extractor.json └── sagemaker │ ├── __init__.py │ └── scripts │ ├── tensorflow │ └── requirements.txt │ └── pytorch │ └── requirements.txt ├── docs └── source │ ├── en │ ├── notebooks.md │ ├── contributing.md │ ├── _config.py │ ├── perf_infer_special.md │ ├── main_classes │ │ └── keras_callbacks.md │ ├── perf_train_tpu.md │ └── perf_train_special.md │ ├── zh │ └── _toctree.yml │ ├── fr │ ├── in_translation.md │ └── _config.py │ ├── ko │ ├── in_translation.md │ ├── _config.py │ └── perf_infer_gpu_many.md │ ├── ja │ └── _toctree.yml │ ├── de │ ├── _config.py │ └── _toctree.yml │ ├── es │ └── _config.py │ ├── pt │ └── _config.py │ ├── _config.py │ └── it │ ├── _config.py │ ├── perf_infer_special.md │ ├── perf_train_tpu.md │ └── perf_train_special.md ├── utils ├── test_module │ ├── __init__.py │ ├── custom_tokenization.py │ ├── custom_image_processing.py │ ├── custom_feature_extraction.py │ ├── custom_processing.py │ ├── custom_tokenization_fast.py │ ├── custom_configuration.py │ ├── custom_modeling.py │ └── custom_pipeline.py └── slow_documentation_tests.txt ├── src └── transformers │ ├── benchmark │ └── __init__.py │ ├── models │ ├── dit │ │ └── __init__.py │ ├── deprecated │ │ ├── __init__.py │ │ ├── bort │ │ │ └── __init__.py │ │ └── tapex │ │ │ └── __init__.py │ ├── dialogpt │ │ └── __init__.py │ ├── esm │ │ └── openfold_utils │ │ │ └── __init__.py │ ├── gpt2 │ │ └── CONVERSION.md │ ├── megatron_gpt2 │ │ └── __init__.py │ ├── byt5 │ │ └── __init__.py │ ├── phobert │ │ └── __init__.py │ ├── bertweet │ │ └── __init__.py │ ├── wav2vec2_with_lm │ │ └── __init__.py │ ├── wav2vec2_phoneme │ │ └── __init__.py │ └── bert_japanese │ │ └── __init__.py │ ├── utils │ ├── constants.py │ ├── dummy_keras_nlp_objects.py │ ├── dummy_sentencepiece_and_tokenizers_objects.py │ ├── dummy_tensorflow_text_objects.py │ ├── dummy_detectron2_objects.py │ ├── dummy_music_objects.py │ ├── dummy_speech_objects.py │ ├── dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py │ └── bitsandbytes.py │ ├── kernels │ ├── yoso │ │ ├── common_cuda.h │ │ └── common.h │ ├── deformable_detr │ │ ├── vision.cpp │ │ ├── cuda │ │ │ └── ms_deform_attn_cuda.h │ │ └── cpu │ │ │ └── ms_deform_attn_cpu.h │ └── mra │ │ └── cuda_launch.h │ ├── sagemaker │ ├── __init__.py │ └── trainer_sm.py │ ├── data │ ├── datasets │ │ └── __init__.py │ └── processors │ │ └── __init__.py │ ├── commands │ └── __init__.py │ ├── generation_tf_utils.py │ ├── generation_flax_utils.py │ └── generation_utils.py ├── examples ├── research_projects │ ├── bertabs │ │ ├── __init__.py │ │ └── requirements.txt │ ├── deebert │ │ ├── src │ │ │ └── __init__.py │ │ ├── requirements.txt │ │ ├── eval_deebert.sh │ │ ├── entropy_eval.sh │ │ └── train_deebert.sh │ ├── codeparrot │ │ ├── scripts │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_deduplicate.py │ │ │ ├── initialize_model.py │ │ │ └── bpe_training.py │ │ ├── examples │ │ │ └── requirements.txt │ │ └── requirements.txt │ ├── bert-loses-patience │ │ ├── pabee │ │ │ └── __init__.py │ │ └── requirements.txt │ ├── fsner │ │ ├── requirements.txt │ │ ├── src │ │ │ └── fsner │ │ │ │ └── __init__.py │ │ ├── pyproject.toml │ │ └── setup.py │ ├── information-gain-filtration │ │ ├── igf │ │ │ └── __init__.py │ │ ├── requirements.txt │ │ └── result_igf.png │ ├── onnx │ │ └── summarization │ │ │ └── requirements.txt │ ├── adversarial │ │ └── requirements.txt │ ├── bertology │ │ └── requirements.txt │ ├── tapex │ │ └── requirements.txt │ ├── layoutlmv3 │ │ └── requirements.txt │ ├── longform-qa │ │ ├── requirements.txt │ │ └── README.md │ ├── mlm_wwm │ │ └── requirements.txt │ ├── xtreme-s │ │ └── requirements.txt │ ├── rag │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── finetune_rag.sh │ ├── pplm │ │ ├── imgs │ │ │ ├── wooly.png │ │ │ └── headfigure.png │ │ ├── requirements.txt │ │ └── pplm_classification_head.py │ ├── wav2vec2 │ │ ├── requirements.txt │ │ ├── run_alignment.sh │ │ ├── finetune_base_100.sh │ │ ├── finetune_large_lv60_100.sh │ │ ├── finetune_base_timit_asr.sh │ │ ├── finetune_large_lv60_timit_asr.sh │ │ ├── finetune_wav2vec2_xlsr_turkish.sh │ │ ├── finetune_large_xlsr_53_arabic_speech_corpus.sh │ │ └── vocab │ │ │ └── buckwalter.json │ ├── distillation │ │ ├── requirements.txt │ │ └── training_configs │ │ │ ├── distilgpt2.json │ │ │ ├── distilbert-base-cased.json │ │ │ ├── distilbert-base-uncased.json │ │ │ ├── distilbert-base-multilingual-cased.json │ │ │ └── distilroberta-base.json │ ├── jax-projects │ │ ├── big_bird │ │ │ ├── requirements.txt │ │ │ └── sweep_flax.yaml │ │ └── hybrid_clip │ │ │ └── requirements.txt │ ├── movement-pruning │ │ ├── emmental │ │ │ ├── modules │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ └── requirements.txt │ ├── self-training-text-classification │ │ └── requirements.txt │ ├── rag-end2end-retriever │ │ ├── requirements.txt │ │ └── test_run │ │ │ ├── dummy-train-data │ │ │ ├── test.target │ │ │ ├── val.target │ │ │ ├── val.source │ │ │ └── test.source │ │ │ └── test_rag_new_features.sh │ ├── lxmert │ │ └── README.md │ ├── performer │ │ ├── full_script.sh │ │ └── sanity_script.sh │ ├── seq2seq-distillation │ │ ├── requirements.txt │ │ ├── finetune.sh │ │ ├── finetune_t5.sh │ │ ├── finetune_pegasus_xsum.sh │ │ ├── train_mbart_cc25_enro.sh │ │ ├── dynamic_bs_example.sh │ │ ├── sentence_splitter.py │ │ ├── distil_marian_no_teacher.sh │ │ ├── train_distilbart_cnn.sh │ │ ├── distil_marian_enro_teacher.sh │ │ ├── train_distilbart_xsum.sh │ │ └── finetune_bart_tiny.sh │ ├── visual_bert │ │ └── README.md │ ├── vqgan-clip │ │ ├── requirements.txt │ │ └── utils.py │ ├── mm-imdb │ │ └── README.md │ └── README.md ├── pytorch │ ├── benchmarking │ │ └── requirements.txt │ ├── image-pretraining │ │ └── requirements.txt │ ├── contrastive-image-text │ │ └── requirements.txt │ ├── audio-classification │ │ └── requirements.txt │ ├── question-answering │ │ └── requirements.txt │ ├── text-generation │ │ └── requirements.txt │ ├── token-classification │ │ ├── requirements.txt │ │ ├── run.sh │ │ └── run_no_trainer.sh │ ├── speech-pretraining │ │ └── requirements.txt │ ├── speech-recognition │ │ └── requirements.txt │ ├── image-classification │ │ └── requirements.txt │ ├── multiple-choice │ │ ├── requirements.txt │ │ └── run_no_trainer.sh │ ├── semantic-segmentation │ │ └── requirements.txt │ ├── language-modeling │ │ └── requirements.txt │ ├── text-classification │ │ └── requirements.txt │ ├── translation │ │ └── requirements.txt │ ├── summarization │ │ └── requirements.txt │ └── _tests_requirements.txt ├── tensorflow │ ├── benchmarking │ │ └── requirements.txt │ ├── contrastive-image-text │ │ └── requirements.txt │ ├── language-modeling │ │ └── requirements.txt │ ├── image-classification │ │ └── requirements.txt │ ├── summarization │ │ └── requirements.txt │ ├── translation │ │ └── requirements.txt │ ├── multiple-choice │ │ └── requirements.txt │ ├── question-answering │ │ └── requirements.txt │ ├── token-classification │ │ └── requirements.txt │ ├── language-modeling-tpu │ │ └── requirements.txt │ ├── text-classification │ │ └── requirements.txt │ └── _tests_requirements.txt ├── flax │ ├── language-modeling │ │ └── requirements.txt │ ├── question-answering │ │ └── requirements.txt │ ├── text-classification │ │ └── requirements.txt │ ├── token-classification │ │ └── requirements.txt │ ├── _tests_requirements.txt │ ├── summarization │ │ └── requirements.txt │ └── vision │ │ └── requirements.txt └── legacy │ ├── seq2seq │ ├── __init__.py │ ├── test_data │ │ ├── wmt_en_ro │ │ │ ├── val.len │ │ │ └── train.len │ │ └── fsmt │ │ │ └── build-eval-data.py │ ├── requirements.txt │ ├── finetune.sh │ ├── finetune_tpu.sh │ ├── minify_dataset.py │ └── rouge_cli.py │ ├── pytorch-lightning │ ├── requirements.txt │ ├── run_glue.sh │ └── run_pos.sh │ ├── README.md │ └── token-classification │ ├── run_pos.sh │ ├── scripts │ └── preprocess.py │ └── run_chunk.sh ├── .gitattributes ├── .github ├── conda │ ├── build.sh │ └── meta.yaml ├── workflows │ ├── delete_doc_comment_trigger.yml │ ├── delete_doc_comment.yml │ ├── TROUBLESHOOT.md │ ├── upload_pr_documentation.yml │ ├── build_pr_documentation.yml │ ├── build_documentation.yml │ ├── stale.yml │ ├── update_metdata.yml │ └── release-conda.yml └── ISSUE_TEMPLATE │ ├── config.yml │ ├── new-model-addition.yml │ └── feature-request.yml ├── setup.cfg ├── templates ├── adding_a_new_model │ ├── open_model_proposals │ │ └── README.md │ ├── tests │ │ ├── pt-encoder-bert-tokenizer.json │ │ ├── tf-encoder-bert-tokenizer.json │ │ ├── encoder-bert-tokenizer.json │ │ ├── flax-encoder-bert-tokenizer.json │ │ ├── standalone.json │ │ ├── pt-seq-2-seq-bart-tokenizer.json │ │ ├── flax-seq-2-seq-bart-tokenizer.json │ │ └── tf-seq-2-seq-bart-tokenizer.json │ ├── cookiecutter.json │ └── cookiecutter-template-{{cookiecutter.modelname}} │ │ └── configuration.json ├── adding_a_new_example_script │ └── cookiecutter.json └── adding_a_missing_tokenization_test │ └── cookiecutter.json ├── .coveragerc ├── docker ├── transformers-pytorch-tpu │ ├── docker-entrypoint.sh │ ├── dataset.yaml │ └── bert-base-cased.jsonnet ├── transformers-pytorch-cpu │ └── Dockerfile ├── transformers-tensorflow-cpu │ └── Dockerfile ├── transformers-cpu │ └── Dockerfile ├── transformers-gpu │ └── Dockerfile ├── transformers-doc-builder │ └── Dockerfile └── transformers-tensorflow-gpu │ └── Dockerfile ├── .circleci └── TROUBLESHOOT.md ├── scripts ├── tatoeba │ └── upload_models.sh └── fsmt │ └── tests-to-run.sh ├── pyproject.toml └── model_cards └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/en/notebooks.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/empty.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/generation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/cpm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/cvt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dpr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/esm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/git/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/led/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mt5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mvp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/pvt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/rag/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/sam/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/sew/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/t5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/en/contributing.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/albert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/align/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/altclip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/auto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bart/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/barthez/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bartpho/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/beit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bertweet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/big_bird/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/biogpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/blip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/blip_2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bloom/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bros/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/byt5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/canine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/clap/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/clip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/clipseg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/codegen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/convbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/convnext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/cpmant/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/ctrl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/data2vec/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deberta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/detr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dinat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/dinov2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/donut/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/electra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/encodec/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/ernie/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/ernie_m/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/falcon/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/flaubert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/flava/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/fnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/focalnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/fsmt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/funnel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/glpn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_neo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_neox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_sw3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gptj/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/groupvit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/herbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/hubert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/ibert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/idefics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/imagegpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/informer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/jukebox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/layoutlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/levit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/lilt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/longt5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/luke/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/lxmert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/m2m_100/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/marian/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/markuplm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mbart/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mbart50/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mega/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mgp_str/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mluke/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mpnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/musicgen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nezha/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nllb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nllb_moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/owlvit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/pegasus/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/phobert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/plbart/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/qdqbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/realm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/reformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/regnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/rembert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/roberta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/roc_bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/roformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/rwkv/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/sew_d/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/speecht5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/splinter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/swin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/swin2sr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/swinv2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/tapas/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/trocr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/tvlt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/upernet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/videomae/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vilt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vit_mae/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vit_msn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vitdet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vits/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vivit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wav2vec2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wavlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/whisper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/x_clip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xglm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xmod/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/yolos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/yoso/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tokenization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/test_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/bettertransformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/autoformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bert_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/blenderbot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bridgetower/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/camembert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/chinese_clip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/code_llama/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/convnextv2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deberta_v2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/distilbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/efficientnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_bigcode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/graphormer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/instructblip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/layoutlmv2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/layoutlmv3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/layoutxlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/longformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mask2former/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/maskformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/megatron_bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/megatron_gpt2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilebert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilenet_v1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilenet_v2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilevit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/mobilevitv2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/nystromformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/oneformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/pegasus_x/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/perceiver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/persimmon/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/pix2struct/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/poolformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/pop2piano/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/prophetnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/segformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/squeezebert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/swiftformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/timesformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/timm_backbone/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/transfo_xl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/umt5/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/models/unispeech/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/unispeech_sat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/visual_bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vit_hybrid/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlm_roberta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/quantization/bnb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/quantization/gptq/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/models/dit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bert_generation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/bigbird_pegasus/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/blenderbot_small/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/conditional_detr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/deformable_detr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/efficientformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gpt_neox_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/gptsan_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/speech_to_text/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/speech_to_text_2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/table_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wav2vec2_conformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wav2vec2_phoneme/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/wav2vec2_with_lm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlm_prophetnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/xlm_roberta_xl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/bertabs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/models/deprecated/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/models/dialogpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/decision_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/roberta_prelayernorm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/speech_encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/switch_transformers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/time_series_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vision_encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/transformers/models/deprecated/bort/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/vision_text_dual_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pytorch/benchmarking/requirements.txt: -------------------------------------------------------------------------------- 1 | torch >= 1.3 -------------------------------------------------------------------------------- /tests/models/audio_spectrogram_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/scripts/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/bert-loses-patience/pabee/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/research_projects/fsner/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.9.2 -------------------------------------------------------------------------------- /examples/tensorflow/benchmarking/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 2.3 -------------------------------------------------------------------------------- /examples/research_projects/information-gain-filtration/igf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/dummy-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_type": "roberta" 3 | } -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py eol=lf 2 | *.rst eol=lf 3 | *.md eol=lf 4 | *.mdx eol=lf -------------------------------------------------------------------------------- /examples/research_projects/deebert/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /examples/research_projects/onnx/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | torch >= 1.10 -------------------------------------------------------------------------------- /tests/fixtures/merges.txt: -------------------------------------------------------------------------------- 1 | #version: 0.2 2 | Ġ l 3 | Ġl o 4 | Ġlo w 5 | e r 6 | -------------------------------------------------------------------------------- /examples/research_projects/adversarial/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /examples/research_projects/bertology/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | -------------------------------------------------------------------------------- /tests/fixtures/input.txt: -------------------------------------------------------------------------------- 1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer 2 | -------------------------------------------------------------------------------- /examples/research_projects/bert-loses-patience/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 -------------------------------------------------------------------------------- /examples/research_projects/tapex/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | datasets 3 | pandas 4 | nltk -------------------------------------------------------------------------------- /examples/research_projects/layoutlmv3/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | seqeval 3 | pillow 4 | -------------------------------------------------------------------------------- /.github/conda/build.sh: -------------------------------------------------------------------------------- 1 | $PYTHON setup.py install # Python command to install the script. 2 | -------------------------------------------------------------------------------- /examples/tensorflow/contrastive-image-text/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow>=2.6.0 2 | datasets>=1.8.0 -------------------------------------------------------------------------------- /examples/tensorflow/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 -------------------------------------------------------------------------------- /examples/pytorch/image-pretraining/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.5.0 2 | torchvision>=0.6.0 3 | datasets>=1.8.0 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS 3 | doctest_glob=**/*.md -------------------------------------------------------------------------------- /examples/pytorch/contrastive-image-text/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.5.0 2 | torchvision>=0.6.0 3 | datasets>=1.8.0 -------------------------------------------------------------------------------- /examples/tensorflow/image-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets>=1.17.0 2 | evaluate 3 | tensorflow>=2.4 4 | -------------------------------------------------------------------------------- /examples/tensorflow/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /examples/tensorflow/translation/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /tests/fixtures/test_entity_vocab.json: -------------------------------------------------------------------------------- 1 | {"[MASK]": 0, "[UNK]": 1, "[PAD]": 2, "DUMMY": 3, "DUMMY2": 4, "[MASK2]": 5} -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/.gitignore: -------------------------------------------------------------------------------- 1 | cache* 2 | temp* 3 | !*.txt 4 | !*.tsv 5 | !*.json 6 | !.gitignore -------------------------------------------------------------------------------- /examples/tensorflow/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece != 0.1.92 2 | protobuf 3 | tensorflow >= 2.3 4 | -------------------------------------------------------------------------------- /examples/tensorflow/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /examples/research_projects/bertabs/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers == 3.5.1 2 | 3 | # For ROUGE 4 | nltk 5 | py-rouge 6 | -------------------------------------------------------------------------------- /examples/research_projects/longform-qa/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | faiss-cpu 3 | streamlit 4 | elasticsearch 5 | -------------------------------------------------------------------------------- /examples/research_projects/mlm_wwm/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | sentencepiece != 0.1.92 3 | protobuf 4 | ltp 5 | -------------------------------------------------------------------------------- /examples/tensorflow/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.4.0 2 | tensorflow >= 2.3.0 3 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /examples/pytorch/audio-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets>=1.14.0 2 | evaluate 3 | librosa 4 | torchaudio 5 | torch>=1.6 -------------------------------------------------------------------------------- /examples/pytorch/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | datasets >= 1.8.0 3 | torch >= 1.3.0 4 | evaluate -------------------------------------------------------------------------------- /examples/tensorflow/language-modeling-tpu/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.26.1 2 | datasets==2.9.0 3 | tokenizers==0.13.2 4 | -------------------------------------------------------------------------------- /tests/fixtures/spiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/tests/fixtures/spiece.model -------------------------------------------------------------------------------- /examples/pytorch/text-generation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.21.0 2 | sentencepiece != 0.1.92 3 | protobuf 4 | torch >= 1.3 5 | -------------------------------------------------------------------------------- /examples/research_projects/xtreme-s/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.18.0 2 | torch >= 1.5 3 | torchaudio 4 | librosa 5 | jiwer 6 | -------------------------------------------------------------------------------- /tests/deepspeed/vit_feature_extractor.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_extractor_type": "ViTFeatureExtractor", 3 | "size": 30 4 | } 5 | -------------------------------------------------------------------------------- /examples/flax/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.5 5 | optax>=0.0.9 6 | -------------------------------------------------------------------------------- /examples/flax/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | jax>=0.2.17 3 | jaxlib>=0.1.68 4 | flax>=0.3.5 5 | optax>=0.0.8 -------------------------------------------------------------------------------- /examples/pytorch/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | seqeval 3 | datasets >= 1.8.0 4 | torch >= 1.3 5 | evaluate -------------------------------------------------------------------------------- /examples/flax/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.5 5 | optax>=0.0.8 6 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__))) 6 | -------------------------------------------------------------------------------- /examples/pytorch/speech-pretraining/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.12.0 2 | torch >= 1.5 3 | torchaudio 4 | accelerate >= 0.12.0 5 | librosa -------------------------------------------------------------------------------- /examples/pytorch/speech-recognition/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.18.0 2 | torch >= 1.5 3 | torchaudio 4 | librosa 5 | jiwer 6 | evaluate 7 | -------------------------------------------------------------------------------- /examples/flax/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.5 5 | optax>=0.0.8 6 | seqeval -------------------------------------------------------------------------------- /examples/pytorch/image-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.12.0 2 | torch>=1.5.0 3 | torchvision>=0.6.0 4 | datasets>=1.17.0 5 | evaluate -------------------------------------------------------------------------------- /examples/pytorch/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | sentencepiece != 0.1.92 3 | protobuf 4 | torch >= 1.3 5 | evaluate 6 | -------------------------------------------------------------------------------- /utils/test_module/custom_tokenization.py: -------------------------------------------------------------------------------- 1 | from transformers import BertTokenizer 2 | 3 | 4 | class CustomTokenizer(BertTokenizer): 5 | pass 6 | -------------------------------------------------------------------------------- /examples/flax/_tests_requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | pytest 3 | conllu 4 | nltk 5 | rouge-score 6 | seqeval 7 | tensorboard 8 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /examples/flax/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | jax>=0.2.8 3 | jaxlib>=0.1.59 4 | flax>=0.3.5 5 | optax>=0.0.8 6 | evaluate>=0.2.0 7 | -------------------------------------------------------------------------------- /examples/pytorch/semantic-segmentation/requirements.txt: -------------------------------------------------------------------------------- 1 | git://github.com/huggingface/accelerate.git 2 | datasets >= 2.0.0 3 | torch >= 1.3 4 | evaluate -------------------------------------------------------------------------------- /examples/research_projects/rag/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(1, os.path.dirname(os.path.realpath(__file__))) 6 | -------------------------------------------------------------------------------- /tests/fixtures/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_extractor_type": "Wav2Vec2FeatureExtractor", 3 | "processor_class": "Wav2Vec2Processor" 4 | } -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/tests/fixtures/test_sentencepiece.model -------------------------------------------------------------------------------- /docs/source/zh/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: 🤗 Transformers简介 4 | - local: quicktour 5 | title: 快速上手 6 | title: 开始使用 -------------------------------------------------------------------------------- /examples/tensorflow/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.1.3 2 | sentencepiece != 0.1.92 3 | protobuf 4 | tensorflow >= 2.3 5 | evaluate >= 0.2.0 -------------------------------------------------------------------------------- /tests/sagemaker/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | 4 | def is_sagemaker_available(): 5 | return importlib.util.find_spec("sagemaker") is not None 6 | -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_bpe.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/tests/fixtures/test_sentencepiece_bpe.model -------------------------------------------------------------------------------- /tests/fixtures/vocab.txt: -------------------------------------------------------------------------------- 1 | [PAD] 2 | [SEP] 3 | [MASK] 4 | [CLS] 5 | [unused3] 6 | [unused4] 7 | [unused5] 8 | [unused6] 9 | [unused7] 10 | [unused8] 11 | -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/examples/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets==2.3.2 2 | transformers==4.21.1 3 | wandb==0.13.1 4 | evaluate==0.2.2 5 | scikit-learn==1.1.2 -------------------------------------------------------------------------------- /examples/research_projects/information-gain-filtration/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy>=1.17.2 3 | joblib>=0.13.2 4 | scipy 5 | torch>=1.10.1 6 | transformers>=3.5 -------------------------------------------------------------------------------- /examples/research_projects/pplm/imgs/wooly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/examples/research_projects/pplm/imgs/wooly.png -------------------------------------------------------------------------------- /tests/fixtures/dummy_feature_extractor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_extractor_type": "Wav2Vec2FeatureExtractor", 3 | "processor_class": "Wav2Vec2Processor" 4 | } 5 | -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_no_bos.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/tests/fixtures/test_sentencepiece_no_bos.model -------------------------------------------------------------------------------- /utils/test_module/custom_image_processing.py: -------------------------------------------------------------------------------- 1 | from transformers import CLIPImageProcessor 2 | 3 | 4 | class CustomImageProcessor(CLIPImageProcessor): 5 | pass 6 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | datasets 3 | torch>=1.5.0 4 | torchaudio 5 | jiwer==2.2.0 6 | lang-trans==0.6.0 7 | librosa==0.8.0 8 | -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_bpe_char.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/tests/fixtures/test_sentencepiece_bpe_char.model -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/000000039769.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/tests/fixtures/tests_samples/COCO/000000039769.png -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/wmt_en_ro/val.len: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/examples/legacy/seq2seq/test_data/wmt_en_ro/val.len -------------------------------------------------------------------------------- /examples/research_projects/pplm/imgs/headfigure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/examples/research_projects/pplm/imgs/headfigure.png -------------------------------------------------------------------------------- /templates/adding_a_new_model/open_model_proposals/README.md: -------------------------------------------------------------------------------- 1 | Currently the following model proposals are available: 2 | 3 | - [BigBird (Google)](./ADD_BIG_BIRD.md) 4 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/wmt_en_ro/train.len: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/examples/legacy/seq2seq/test_data/wmt_en_ro/train.len -------------------------------------------------------------------------------- /examples/research_projects/distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | 3 | gitpython==3.1.32 4 | tensorboard>=1.14.0 5 | tensorboardX==1.8 6 | psutil==5.6.6 7 | scipy>=1.4.1 8 | -------------------------------------------------------------------------------- /utils/test_module/custom_feature_extraction.py: -------------------------------------------------------------------------------- 1 | from transformers import Wav2Vec2FeatureExtractor 2 | 3 | 4 | class CustomFeatureExtractor(Wav2Vec2FeatureExtractor): 5 | pass 6 | -------------------------------------------------------------------------------- /examples/pytorch/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | torch >= 1.3 3 | datasets >= 1.8.0 4 | sentencepiece != 0.1.92 5 | protobuf 6 | evaluate 7 | scikit-learn 8 | -------------------------------------------------------------------------------- /examples/research_projects/jax-projects/big_bird/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers@main 2 | datasets 3 | sentencepiece 4 | wandb 5 | flax 6 | jsonlines 7 | -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece_with_bytefallback.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/tests/fixtures/test_sentencepiece_with_bytefallback.model -------------------------------------------------------------------------------- /examples/pytorch/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | scipy 5 | scikit-learn 6 | protobuf 7 | torch >= 1.3 8 | evaluate -------------------------------------------------------------------------------- /examples/pytorch/translation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | protobuf 5 | sacrebleu >= 1.4.12 6 | py7zr 7 | torch >= 1.3 8 | evaluate -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/emmental/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer 2 | from .masked_nn import MaskedLinear 3 | -------------------------------------------------------------------------------- /tests/sagemaker/scripts/tensorflow/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers.git@main # install main or adjust ist with vX.X.X for installing version specific transforms -------------------------------------------------------------------------------- /examples/research_projects/fsner/src/fsner/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import FSNERModel 2 | from .tokenizer_utils import FSNERTokenizerUtils 3 | 4 | 5 | __all__ = ["FSNERModel", "FSNERTokenizerUtils"] 6 | -------------------------------------------------------------------------------- /examples/research_projects/self-training-text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | datasets >= 1.8.0 3 | protobuf 4 | scikit-learn 5 | scipy 6 | sentencepiece != 0.1.92 7 | torch >= 1.3 8 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/coco_panoptic/000000039769.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/tests/fixtures/tests_samples/COCO/coco_panoptic/000000039769.png -------------------------------------------------------------------------------- /utils/slow_documentation_tests.txt: -------------------------------------------------------------------------------- 1 | docs/source/en/generation_strategies.md 2 | docs/source/en/model_doc/ctrl.md 3 | docs/source/en/task_summary.md 4 | src/transformers/models/ctrl/modeling_ctrl.py 5 | -------------------------------------------------------------------------------- /tests/sagemaker/scripts/pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers.git@main # install main or adjust it with vX.X.X for installing version specific transforms 2 | datasets==1.8.0 -------------------------------------------------------------------------------- /examples/pytorch/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate >= 0.12.0 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | protobuf 5 | rouge-score 6 | nltk 7 | py7zr 8 | torch >= 1.3 9 | evaluate 10 | -------------------------------------------------------------------------------- /examples/research_projects/fsner/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=57.4.0", 4 | "wheel>=0.37.0", 5 | "transformers>=4.9.2" 6 | ] 7 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /examples/research_projects/information-gain-filtration/result_igf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArtificialZeng/transformers-Explained/HEAD/examples/research_projects/information-gain-filtration/result_igf.png -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu >= 1.7.2 2 | datasets 3 | psutil >= 5.9.1 4 | torch >= 1.11.0 5 | pytorch-lightning == 1.6.4 6 | nvidia-ml-py3 == 7.352.0 7 | ray >= 1.13.0 -------------------------------------------------------------------------------- /examples/research_projects/rag/requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu >= 1.6.3 2 | datasets >= 1.0.1 3 | psutil >= 5.7.0 4 | torch >= 1.4.0 5 | ray >= 1.10.0 6 | pytorch-lightning >= 1.5.10, <=1.6.0 7 | transformers 8 | GitPython -------------------------------------------------------------------------------- /utils/test_module/custom_processing.py: -------------------------------------------------------------------------------- 1 | from transformers import ProcessorMixin 2 | 3 | 4 | class CustomProcessor(ProcessorMixin): 5 | feature_extractor_class = "AutoFeatureExtractor" 6 | tokenizer_class = "AutoTokenizer" 7 | -------------------------------------------------------------------------------- /docs/source/fr/in_translation.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | # Traduction en cours. -------------------------------------------------------------------------------- /docs/source/ko/in_translation.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | # 열심히 번역 중입니다. 조금 이따 만나요! -------------------------------------------------------------------------------- /examples/research_projects/lxmert/README.md: -------------------------------------------------------------------------------- 1 | # LXMERT DEMO 2 | 3 | 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate`` 4 | 2. install reqs: ``pip install -r ./requirements.txt`` 5 | 3. usage is as shown in demo.ipynb 6 | -------------------------------------------------------------------------------- /tests/fixtures/vocab.json: -------------------------------------------------------------------------------- 1 | {"l": 0, "o": 1, "w": 2, "e": 3, "r": 4, "s": 5, "t": 6, "i": 7, "d": 8, "n": 9, "Ġ": 10, "Ġl": 11, "Ġn": 12, "Ġlo": 13, "Ġlow": 14, "er": 15, "Ġlowest": 16, "Ġnewer": 17, "Ġwider": 18, "": 19, "<|endoftext|>": 20} 2 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilgpt2.json: -------------------------------------------------------------------------------- 1 | { 2 | "initializer_range": 0.02, 3 | "layer_norm_epsilon": 0.00001, 4 | "n_embd": 768, 5 | "n_head": 12, 6 | "n_layer": 6, 7 | "n_positions": 1024, 8 | "vocab_size": 50257 9 | } -------------------------------------------------------------------------------- /examples/flax/vision/requirements.txt: -------------------------------------------------------------------------------- 1 | jax>=0.2.8 2 | jaxlib>=0.1.59 3 | flax>=0.3.5 4 | optax>=0.0.8 5 | -f https://download.pytorch.org/whl/torch_stable.html 6 | torch==1.11.0+cpu 7 | -f https://download.pytorch.org/whl/torch_stable.html 8 | torchvision==0.12.0+cpu 9 | -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.4.0 2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers 3 | knockknock>=0.1.8.1 4 | h5py>=2.10.0 5 | numpy>=1.18.2 6 | scipy>=1.4.1 7 | -------------------------------------------------------------------------------- /utils/test_module/custom_tokenization_fast.py: -------------------------------------------------------------------------------- 1 | from transformers import BertTokenizerFast 2 | 3 | from .custom_tokenization import CustomTokenizer 4 | 5 | 6 | class CustomTokenizerFast(BertTokenizerFast): 7 | slow_tokenizer_class = CustomTokenizer 8 | pass 9 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=transformers 3 | omit = 4 | # skip convertion scripts from testing for now 5 | */convert_* 6 | */__main__.py 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | raise 11 | except 12 | register_parameter -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/run_alignment.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python alignment.py \ 3 | --model_name="arijitx/wav2vec2-xls-r-300m-bengali" \ 4 | --wav_dir="./wavs" \ 5 | --text_file="script.txt" \ 6 | --input_wavs_sr=48000 \ 7 | --output_dir="./out_alignment" \ 8 | --cuda 9 | -------------------------------------------------------------------------------- /examples/research_projects/jax-projects/hybrid_clip/requirements.txt: -------------------------------------------------------------------------------- 1 | jax>=0.2.8 2 | jaxlib>=0.1.59 3 | flax>=0.3.5 4 | optax>=0.0.8 5 | -f https://download.pytorch.org/whl/torch_stable.html 6 | torch==1.9.0+cpu 7 | -f https://download.pytorch.org/whl/torch_stable.html 8 | torchvision==0.10.0+cpu -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/test.target: -------------------------------------------------------------------------------- 1 | to a snake 2 | Moses' assistant 3 | Egyptian royal court 4 | let his rod turn in to a snake 5 | The Pokémon Company 6 | Nintendo 7 | world's top-selling toy brand, the top-selling trading card game 8 | over 20 seasons 9 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/val.target: -------------------------------------------------------------------------------- 1 | to a snake 2 | Moses' assistant 3 | Egyptian royal court 4 | let his rod turn in to a snake 5 | The Pokémon Company 6 | Nintendo 7 | world's top-selling toy brand, the top-selling trading card game 8 | over 20 seasons -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.19.0 2 | datasets==1.16.0 3 | wandb==0.12.0 4 | tensorboard==2.6.0 5 | torch==1.11.0 6 | huggingface-hub==0.1.0 7 | git+https://github.com/huggingface/accelerate.git@3c45b6f760ad8745be9ebc9bbb26f5b04dea4abe 8 | datasketch==1.5.7 9 | dpu_utils -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source ~/.bashrc 3 | echo "running docker-entrypoint.sh" 4 | conda activate container 5 | echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS 6 | echo "printed TPU info" 7 | export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}" 8 | exec "$@"#!/bin/bash 9 | -------------------------------------------------------------------------------- /src/transformers/utils/constants.py: -------------------------------------------------------------------------------- 1 | IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406] 2 | IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225] 3 | IMAGENET_STANDARD_MEAN = [0.5, 0.5, 0.5] 4 | IMAGENET_STANDARD_STD = [0.5, 0.5, 0.5] 5 | OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073] 6 | OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711] 7 | -------------------------------------------------------------------------------- /.github/workflows/delete_doc_comment_trigger.yml: -------------------------------------------------------------------------------- 1 | name: Delete doc comment trigger 2 | 3 | on: 4 | pull_request: 5 | types: [ closed ] 6 | 7 | 8 | jobs: 9 | delete: 10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main 11 | with: 12 | pr_number: ${{ github.event.number }} 13 | -------------------------------------------------------------------------------- /src/transformers/kernels/yoso/common_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | #define MAX_THREADS_PER_BLOCK 1024 3 | #define OPTIMAL_THREADS_PER_BLOCK 256 4 | #define WARP_SIZE 32 5 | #define MAX_NUM_BLOCK_X 2147483647 6 | #define MAX_NUM_BLOCK_Y 65535 7 | #define MAX_NUM_BLOCK_Z 65535 8 | #define MAX_SHARED_MEM_PER_BLOCK 48000 9 | #define FULL_MASK 0xffffffff 10 | -------------------------------------------------------------------------------- /src/transformers/kernels/yoso/common.h: -------------------------------------------------------------------------------- 1 | 2 | #define min(a, b) ((a)<(b)?(a):(b)) 3 | #define max(a, b) ((a)>(b)?(a):(b)) 4 | #define ceil_divide(a, b) ((a)/(b)+((a)%(b)!=0)) 5 | #define select(cond, a, b) ((cond)?(a):(b)) 6 | #define PI 3.141592 7 | #define EPSILON 1e-8 8 | #define MAX_VAL 1e12 9 | #define MIN_VAL -1e12 10 | #define EMPTY_VALUE -1 11 | -------------------------------------------------------------------------------- /docs/source/ja/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: 🤗 Transformers 4 | - local: installation 5 | title: インストール 6 | title: はじめに 7 | - sections: 8 | - local: accelerate 9 | title: 🤗 Accelerate を用いた分散学習 10 | title: チュートリアル 11 | - sections: 12 | - sections: 13 | - local: multilingual 14 | title: 推論のための多言語モデル -------------------------------------------------------------------------------- /examples/legacy/seq2seq/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/val.source: -------------------------------------------------------------------------------- 1 | What does Moses' rod turn into ? 2 | Who is Aron? 3 | Where did Moses grow up ? 4 | What happens at the command of the Moses ? 5 | Who manages the Pokémon ? 6 | Who owned the Pokémon trademark ? 7 | What else include in Pokémon franchise ? 8 | How many seasons in Pokémon animme series ? -------------------------------------------------------------------------------- /.circleci/TROUBLESHOOT.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | This is a document explaining how to deal with various issues on Circle-CI. The entries may include actually solutions or pointers to Issues that cover those. 4 | 5 | ## Circle CI 6 | 7 | * pytest worker runs out of resident RAM and gets killed by `cgroups`: https://github.com/huggingface/transformers/issues/11408 8 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/dummy-train-data/test.source: -------------------------------------------------------------------------------- 1 | What does Moses' rod turn into ? 2 | Who is Aron? 3 | Where did Moses grow up ? 4 | What happens at the command of the Moses ? 5 | Who manages the Pokémon ? 6 | Who owned the Pokémon trademark ? 7 | What else include in Pokémon franchise ? 8 | How many seasons in Pokémon animme series ? 9 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | ray 22 | -------------------------------------------------------------------------------- /scripts/tatoeba/upload_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for FILE in converted/*; do 4 | model_name=`basename $FILE` 5 | huggingface-cli repo create $model_name -y 6 | git clone https://huggingface.co/Helsinki-NLP/$model_name 7 | mv $FILE/* $model_name/ 8 | cd $model_name 9 | git add . && git commit -m "initial commit" 10 | git push 11 | cd .. 12 | done 13 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_keras_nlp_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class TFGPT2Tokenizer(metaclass=DummyObject): 6 | _backends = ["keras_nlp"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["keras_nlp"]) 10 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/GermEval/labels.txt: -------------------------------------------------------------------------------- 1 | B-LOC 2 | B-LOCderiv 3 | B-LOCpart 4 | B-ORG 5 | B-ORGderiv 6 | B-ORGpart 7 | B-OTH 8 | B-OTHderiv 9 | B-OTHpart 10 | B-PER 11 | B-PERderiv 12 | B-PERpart 13 | I-LOC 14 | I-LOCderiv 15 | I-LOCpart 16 | I-ORG 17 | I-ORGderiv 18 | I-ORGpart 19 | I-OTH 20 | I-OTHderiv 21 | I-OTHpart 22 | I-PER 23 | I-PERderiv 24 | I-PERpart 25 | O 26 | -------------------------------------------------------------------------------- /.github/workflows/delete_doc_comment.yml: -------------------------------------------------------------------------------- 1 | name: Delete doc comment 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Delete doc comment trigger"] 6 | types: 7 | - completed 8 | 9 | 10 | jobs: 11 | delete: 12 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main 13 | secrets: 14 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} -------------------------------------------------------------------------------- /examples/research_projects/movement-pruning/emmental/__init__.py: -------------------------------------------------------------------------------- 1 | from .configuration_bert_masked import MaskedBertConfig 2 | from .modeling_bert_masked import ( 3 | MaskedBertForMultipleChoice, 4 | MaskedBertForQuestionAnswering, 5 | MaskedBertForSequenceClassification, 6 | MaskedBertForTokenClassification, 7 | MaskedBertModel, 8 | ) 9 | from .modules import * 10 | -------------------------------------------------------------------------------- /examples/research_projects/performer/full_script.sh: -------------------------------------------------------------------------------- 1 | TOKENIZERS_PARALLELISM=true python run_mlm_performer.py --output_dir experiments --dataset_name wikipedia --dataset_config_name 20200501.en --model_name_or_path bert-large-cased --tokenizer_name bert-large-cased --do_train --overwrite_output_dir --per_device_train_batch_size 4 --learning_rate 5e-4 --warmup_steps 100 --num_train_epochs 3 --performer -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | psutil 4 | sacrebleu 5 | rouge-score 6 | tensorflow_datasets 7 | pytorch-lightning 8 | matplotlib 9 | git-python==1.0.3 10 | faiss-cpu 11 | streamlit 12 | elasticsearch 13 | nltk 14 | pandas 15 | datasets >= 1.1.3 16 | fire 17 | pytest 18 | conllu 19 | sentencepiece != 0.1.92 20 | protobuf 21 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_sentencepiece_and_tokenizers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | SLOW_TO_FAST_CONVERTERS = None 6 | 7 | 8 | def convert_slow_tokenizer(*args, **kwargs): 9 | requires_backends(convert_slow_tokenizer, ["sentencepiece", "tokenizers"]) 10 | -------------------------------------------------------------------------------- /examples/research_projects/performer/sanity_script.sh: -------------------------------------------------------------------------------- 1 | TOKENIZERS_PARALLELISM=true python run_mlm_performer.py --output_dir experiments --dataset_name wikipedia --dataset_config_name 20200501.simple --model_name_or_path bert-base-cased --tokenizer_name bert-base-cased --do_train --overwrite_output_dir --per_device_train_batch_size 4 --learning_rate 5e-4 --warmup_steps 100 --num_train_epochs 3 --performer -------------------------------------------------------------------------------- /src/transformers/utils/dummy_tensorflow_text_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class TFBertTokenizer(metaclass=DummyObject): 6 | _backends = ["tensorflow_text"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["tensorflow_text"]) 10 | -------------------------------------------------------------------------------- /templates/adding_a_new_example_script/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "example_name": "text classification", 3 | "directory_name": "{{cookiecutter.example_name|lower|replace(' ', '-')}}", 4 | "example_shortcut": "{{cookiecutter.directory_name}}", 5 | "model_class": "AutoModel", 6 | "authors": "The HuggingFace Team", 7 | "can_train_from_scratch": ["True", "False"], 8 | "with_trainer": ["True", "False"] 9 | } -------------------------------------------------------------------------------- /examples/research_projects/pplm/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | pytorch-lightning 9 | matplotlib 10 | git-python==1.0.3 11 | faiss-cpu 12 | streamlit 13 | elasticsearch 14 | nltk 15 | pandas 16 | datasets >= 1.1.3 17 | fire 18 | pytest 19 | conllu 20 | sentencepiece != 0.1.92 21 | protobuf 22 | transformers==3.5.1 23 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 28996 14 | } 15 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-uncased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 30522 14 | } 15 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilbert-base-multilingual-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 119547 14 | } 15 | -------------------------------------------------------------------------------- /templates/adding_a_missing_tokenization_test/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "BrandNewBERT", 3 | "uppercase_modelname": "BRAND_NEW_BERT", 4 | "lowercase_modelname": "brand_new_bert", 5 | "camelcase_modelname": "BrandNewBert", 6 | "has_slow_class": ["True", "False"], 7 | "has_fast_class": ["True", "False"], 8 | "slow_tokenizer_use_sentencepiece": ["True", "False"], 9 | "authors": "The HuggingFace Team" 10 | } 11 | -------------------------------------------------------------------------------- /examples/tensorflow/_tests_requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow<2.15 2 | tensorboard 3 | scikit-learn 4 | seqeval 5 | psutil 6 | sacrebleu >= 1.4.12 7 | rouge-score 8 | tensorflow_datasets 9 | matplotlib 10 | git-python==1.0.3 11 | faiss-cpu 12 | streamlit 13 | elasticsearch 14 | nltk 15 | pandas 16 | datasets >= 1.13.3 17 | fire 18 | pytest 19 | conllu 20 | sentencepiece != 0.1.92 21 | protobuf 22 | jiwer 23 | librosa 24 | evaluate >= 0.2.0 25 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune.sh: -------------------------------------------------------------------------------- 1 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 2 | # run ./finetune.sh --help to see all the possible options 3 | python finetune.py \ 4 | --learning_rate=3e-5 \ 5 | --fp16 \ 6 | --gpus 1 \ 7 | --do_train \ 8 | --do_predict \ 9 | --n_val 1000 \ 10 | --val_check_interval 0.1 \ 11 | "$@" 12 | -------------------------------------------------------------------------------- /.github/workflows/TROUBLESHOOT.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | This is a document explaining how to deal with various issues on github-actions self-hosted CI. The entries may include actually solutions or pointers to Issues that cover those. 4 | 5 | ## GitHub Actions (self-hosted CI) 6 | 7 | * Deepspeed 8 | 9 | - if jit build hangs, clear out `rm -rf ~/.cache/torch_extensions/` reference: https://github.com/huggingface/transformers/pull/12723 10 | -------------------------------------------------------------------------------- /examples/research_projects/visual_bert/README.md: -------------------------------------------------------------------------------- 1 | # VisualBERT Demo 2 | 3 | This demo shows usage of VisualBERT VQA model and is adapted from LXMERT demo present [here](https://github.com/huggingface/transformers/blob/main/examples/research_projects/lxmert/demo.ipynb). 4 | 1. make a virtualenv: ``virtualenv venv`` and activate ``source venv/bin/activate`` 5 | 2. install reqs: ``pip install -r ./requirements.txt`` 6 | 3. usage is as shown in demo.ipynb 7 | -------------------------------------------------------------------------------- /examples/research_projects/vqgan-clip/requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | gradio 3 | icecream 4 | imageio 5 | lpips 6 | matplotlib 7 | more_itertools 8 | numpy 9 | omegaconf 10 | opencv_python_headless 11 | Pillow 12 | pudb 13 | pytorch_lightning 14 | PyYAML 15 | requests 16 | scikit_image 17 | scipy 18 | setuptools 19 | streamlit 20 | taming-transformers 21 | torch 22 | torchvision 23 | tqdm 24 | transformers==4.26.0 25 | tokenizers==0.13.2 26 | typing_extensions 27 | wandb 28 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplatePT", 3 | "uppercase_modelname": "TEMPLATE_PT", 4 | "lowercase_modelname": "template_pt", 5 | "camelcase_modelname": "TemplatePt", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_pytorch_and_flax": "PyTorch", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateTF", 3 | "uppercase_modelname": "TEMPLATE_TF", 4 | "lowercase_modelname": "template_tf", 5 | "camelcase_modelname": "TemplateTf", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_pytorch_and_flax": "TensorFlow", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "Template", 3 | "uppercase_modelname": "TEMPLATE", 4 | "lowercase_modelname": "template", 5 | "camelcase_modelname": "Template", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_pytorch_and_flax": "PyTorch, TensorFlow and Flax", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/flax-encoder-bert-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateFLAX", 3 | "uppercase_modelname": "TEMPLATE_FLAX", 4 | "lowercase_modelname": "template_flax", 5 | "camelcase_modelname": "TemplateFlax", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": "Based on BERT", 9 | "generate_tensorflow_pytorch_and_flax": "Flax", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/standalone.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "TemplateBI", 3 | "uppercase_modelname": "TEMPLATE_BI", 4 | "lowercase_modelname": "template_bi", 5 | "camelcase_modelname": "TemplateBi", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "bi-brand-new-bert-base-cased", 8 | "tokenizer_type": "Standalone", 9 | "generate_tensorflow_pytorch_and_flax": "PyTorch, TensorFlow and Flax", 10 | "is_encoder_decoder_model": "False" 11 | } 12 | -------------------------------------------------------------------------------- /utils/test_module/custom_configuration.py: -------------------------------------------------------------------------------- 1 | from transformers import PretrainedConfig 2 | 3 | 4 | class CustomConfig(PretrainedConfig): 5 | model_type = "custom" 6 | 7 | def __init__(self, attribute=1, **kwargs): 8 | self.attribute = attribute 9 | super().__init__(**kwargs) 10 | 11 | 12 | class NoSuperInitConfig(PretrainedConfig): 13 | model_type = "custom" 14 | 15 | def __init__(self, attribute=1, **kwargs): 16 | self.attribute = attribute 17 | -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Upload PR Documentation 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Build PR Documentation"] 6 | types: 7 | - completed 8 | 9 | jobs: 10 | build: 11 | uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main 12 | with: 13 | package_name: transformers 14 | secrets: 15 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} 16 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_t5.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | python finetune.py \ 5 | --data_dir=$CNN_DIR \ 6 | --learning_rate=3e-5 \ 7 | --train_batch_size=$BS \ 8 | --eval_batch_size=$BS \ 9 | --output_dir=$OUTPUT_DIR \ 10 | --max_source_length=512 \ 11 | --max_target_length=56 \ 12 | --val_check_interval=0.1 --n_val=200 \ 13 | --do_train --do_predict \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "PTNewENCDEC", 3 | "uppercase_modelname": "PT_NEW_ENC_DEC", 4 | "lowercase_modelname": "pt_new_enc_dec_template", 5 | "camelcase_modelname": "PtNewEncDec", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "pt-new-enc-dec-base", 8 | "tokenizer_type": "Based on BART", 9 | "generate_tensorflow_pytorch_and_flax": "PyTorch", 10 | "is_encoder_decoder_model": "True" 11 | } 12 | -------------------------------------------------------------------------------- /examples/research_projects/distillation/training_configs/distilroberta-base.json: -------------------------------------------------------------------------------- 1 | { 2 | "vocab_size": 50265, 3 | "hidden_size": 768, 4 | "num_hidden_layers": 6, 5 | "num_attention_heads": 12, 6 | "intermediate_size": 3072, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "attention_probs_dropout_prob": 0.1, 10 | "max_position_embeddings": 514, 11 | "type_vocab_size": 1, 12 | "initializer_range": 0.02, 13 | "layer_norm_eps": 0.00001 14 | } -------------------------------------------------------------------------------- /examples/research_projects/jax-projects/big_bird/sweep_flax.yaml: -------------------------------------------------------------------------------- 1 | command: 2 | - python3 3 | - train.py 4 | method: random 5 | parameters: 6 | lr: 7 | values: [4e-5, 3e-5] 8 | warmup_steps: 9 | values: [20000, 15000, 10000, 5000] 10 | weight_decay: 11 | distribution: normal 12 | mu: 1e-2 13 | sigma: 2e-3 14 | metric: 15 | name: eval_loss 16 | goal: minimize 17 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_detectron2_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import requires_backends 3 | 4 | 5 | LAYOUTLM_V2_PRETRAINED_MODEL_ARCHIVE_LIST = None 6 | 7 | 8 | class LayoutLMv2Model: 9 | def __init__(self, *args, **kwargs): 10 | requires_backends(self, ["detectron2"]) 11 | 12 | @classmethod 13 | def from_pretrained(cls, *args, **kwargs): 14 | requires_backends(cls, ["detectron2"]) 15 | -------------------------------------------------------------------------------- /examples/pytorch/_tests_requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu >= 1.4.12 6 | git+https://github.com/huggingface/accelerate@main#egg=accelerate 7 | rouge-score 8 | tensorflow_datasets 9 | matplotlib 10 | git-python==1.0.3 11 | faiss-cpu 12 | streamlit 13 | elasticsearch 14 | nltk 15 | pandas 16 | datasets >= 1.13.3 17 | fire 18 | pytest 19 | conllu 20 | sentencepiece != 0.1.92 21 | protobuf 22 | torchvision 23 | jiwer 24 | librosa 25 | evaluate >= 0.2.0 26 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/flax-seq-2-seq-bart-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "FlaxNewENCDEC", 3 | "uppercase_modelname": "FLAX_NEW_ENC_DEC", 4 | "lowercase_modelname": "flax_new_enc_dec_template", 5 | "camelcase_modelname": "FlaxNewEncDec", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "new-flax-enc-dec-base", 8 | "tokenizer_type": "Based on BART", 9 | "generate_tensorflow_pytorch_and_flax": "Flax", 10 | "is_encoder_decoder_model": "True" 11 | } 12 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "NewTFENCDEC", 3 | "uppercase_modelname": "NEW_TF_ENC_DEC", 4 | "lowercase_modelname": "new_tf_enc_dec_template", 5 | "camelcase_modelname": "NewTFEncDec", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "new-tf-enc-dec-base_template", 8 | "tokenizer_type": "Based on BART", 9 | "generate_tensorflow_pytorch_and_flax": "TensorFlow", 10 | "is_encoder_decoder_model": "True" 11 | } 12 | -------------------------------------------------------------------------------- /examples/research_projects/rag-end2end-retriever/test_run/test_rag_new_features.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH="../":"${PYTHONPATH}" 2 | 3 | python use_own_knowledge_dataset.py 4 | 5 | ray start --head 6 | python finetune_rag.py \ 7 | --model_name_or_path facebook/rag-token-base \ 8 | --model_type rag_token \ 9 | --context_encoder_name facebook/dpr-ctx_encoder-multiset-base \ 10 | --fp16 \ 11 | --gpus 1 \ 12 | --profile \ 13 | --end2end \ 14 | --index_name custom 15 | 16 | ray stop 17 | -------------------------------------------------------------------------------- /src/transformers/models/esm/openfold_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .chunk_utils import chunk_layer 2 | from .data_transforms import make_atom14_masks 3 | from .feats import atom14_to_atom37, frames_and_literature_positions_to_atom14_pos, torsion_angles_to_frames 4 | from .loss import compute_predicted_aligned_error, compute_tm 5 | from .protein import Protein as OFProtein 6 | from .protein import to_pdb 7 | from .rigid_utils import Rigid, Rotation 8 | from .tensor_utils import dict_multimap, flatten_final_dims, permute_final_dims 9 | -------------------------------------------------------------------------------- /docs/source/ko/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers 설치 방법 4 | ! pip install transformers datasets 5 | # 마지막 릴리스 대신 소스에서 설치하려면, 위 명령을 주석으로 바꾸고 아래 명령을 해제하세요. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build PR Documentation 2 | 3 | on: 4 | pull_request: 5 | 6 | concurrency: 7 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | build: 12 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main 13 | with: 14 | commit_sha: ${{ github.event.pull_request.head.sha }} 15 | pr_number: ${{ github.event.number }} 16 | package: transformers 17 | languages: de en es fr it ko pt zh 18 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_music_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class Pop2PianoFeatureExtractor(metaclass=DummyObject): 6 | _backends = ["music"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["music"]) 10 | 11 | 12 | class Pop2PianoTokenizer(metaclass=DummyObject): 13 | _backends = ["music"] 14 | 15 | def __init__(self, *args, **kwargs): 16 | requires_backends(self, ["music"]) 17 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_speech_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class ASTFeatureExtractor(metaclass=DummyObject): 6 | _backends = ["speech"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["speech"]) 10 | 11 | 12 | class Speech2TextFeatureExtractor(metaclass=DummyObject): 13 | _backends = ["speech"] 14 | 15 | def __init__(self, *args, **kwargs): 16 | requires_backends(self, ["speech"]) 17 | -------------------------------------------------------------------------------- /docs/source/de/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/es/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/en/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/pt/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt: -------------------------------------------------------------------------------- 1 | [{"id": 8222595, "category_id": 17, "iscrowd": 0, "bbox": [18, 54, 301, 415], "area": 53306}, {"id": 8225432, "category_id": 17, "iscrowd": 0, "bbox": [349, 26, 291, 343], "area": 59627}, {"id": 8798150, "category_id": 63, "iscrowd": 0, "bbox": [1, 0, 639, 474], "area": 174579}, {"id": 14466198, "category_id": 75, "iscrowd": 0, "bbox": [42, 74, 133, 45], "area": 4068}, {"id": 12821912, "category_id": 75, "iscrowd": 0, "bbox": [333, 80, 38, 106], "area": 2118}, {"id": 10898909, "category_id": 93, "iscrowd": 0, "bbox": [0, 0, 640, 480], "area": 2750}] -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | version: 2.1 3 | contact_links: 4 | - name: Model checkpoints on the Hugging Face Hub 5 | url: https://huggingface.co/models 6 | about: Open a Pull request / Discussion related to a specific model checkpoint directly on the Hugging Face Hub 7 | - name: Website Related 8 | url: https://github.com/huggingface/hub-docs/issues 9 | about: Feature requests and bug reports related to the website 10 | - name: Forum 11 | url: https://discuss.huggingface.co/ 12 | about: General usage questions and community discussions 13 | -------------------------------------------------------------------------------- /docs/source/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Transformers installation 4 | ! pip install transformers datasets evaluate 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/fr/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installation de Transformers 4 | ! pip install transformers datasets 5 | # Pour installer à partir du code source au lieu de la dernière version, commentez la commande ci-dessus et décommentez la suivante. 6 | # ! pip install git+https://github.com/huggingface/transformers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - doc-builder* 8 | - v*-release 9 | - use_templates 10 | 11 | jobs: 12 | build: 13 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main 14 | with: 15 | commit_sha: ${{ github.sha }} 16 | package: transformers 17 | notebook_folder: transformers_doc 18 | languages: de en es fr it ko pt zh 19 | secrets: 20 | token: ${{ secrets.HUGGINGFACE_PUSH }} 21 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} 22 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_pegasus_xsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | # From appendix C of paper https://arxiv.org/abs/1912.08777 5 | # Set --gradient_accumulation_steps so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16) 6 | python finetune.py \ 7 | --learning_rate=1e-4 \ 8 | --do_train \ 9 | --do_predict \ 10 | --n_val 1000 \ 11 | --val_check_interval 0.25 \ 12 | --max_source_length 512 --max_target_length 56 \ 13 | --freeze_embeds --label_smoothing 0.1 --adafactor --task summarization_xsum \ 14 | "$@" 15 | -------------------------------------------------------------------------------- /tests/fixtures/add_distilbert_like_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_copied_from": true, 3 | "old_model_type": "distilbert", 4 | "new_model_patterns": { 5 | "model_name": "BERT New", 6 | "checkpoint": "huggingface/bert-new-base", 7 | "model_type": "bert-new", 8 | "model_lower_cased": "bert_new", 9 | "model_camel_cased": "BertNew", 10 | "model_upper_cased": "BERT_NEW", 11 | "config_class": "BertNewConfig", 12 | "tokenizer_class": "DistilBertTokenizer" 13 | }, 14 | "frameworks": [ 15 | "pt", 16 | "tf", 17 | "flax" 18 | ] 19 | } -------------------------------------------------------------------------------- /docs/source/it/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installazione di Transformers 4 | ! pip install transformers datasets 5 | # Per installare dalla fonte invece dell'ultima versione rilasciata, commenta il comando sopra e 6 | # rimuovi la modalità commento al comando seguente. 7 | # ! pip install git+https://github.com/huggingface/transformers.git 8 | """ 9 | 10 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 11 | black_avoid_patterns = { 12 | "{processor_class}": "FakeProcessorClass", 13 | "{model_class}": "FakeModelClass", 14 | "{object_class}": "FakeObjectClass", 15 | } 16 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Stale Bot 2 | 3 | on: 4 | schedule: 5 | - cron: "0 8 * * *" 6 | 7 | jobs: 8 | close_stale_issues: 9 | name: Close Stale Issues 10 | if: github.repository == 'huggingface/transformers' 11 | runs-on: ubuntu-latest 12 | env: 13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - name: Setup Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: 3.8 21 | 22 | - name: Install requirements 23 | run: | 24 | pip install PyGithub 25 | - name: Close stale issues 26 | run: | 27 | python scripts/stale.py 28 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "BrandNewBERT", 3 | "uppercase_modelname": "BRAND_NEW_BERT", 4 | "lowercase_modelname": "brand_new_bert", 5 | "camelcase_modelname": "BrandNewBert", 6 | "authors": "The HuggingFace Team", 7 | "checkpoint_identifier": "brand-new-bert-base-cased", 8 | "tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"], 9 | "generate_tensorflow_pytorch_and_flax": [ 10 | "PyTorch, TensorFlow and Flax", 11 | "PyTorch & TensorFlow", 12 | "PyTorch & Flax", 13 | "TensorFlow & Flax", 14 | "PyTorch", 15 | "TensorFlow", 16 | "Flax" 17 | ], 18 | "is_encoder_decoder_model": ["True", "False"] 19 | } 20 | -------------------------------------------------------------------------------- /examples/research_projects/longform-qa/README.md: -------------------------------------------------------------------------------- 1 | # Long Form Question Answering 2 | 3 | Author: @yjernite 4 | 5 | This folder contains the code for the Long Form Question answering [demo](http://35.226.96.115:8080/) as well as methods to train and use a fully end-to-end Long Form Question Answering system using the [🤗transformers](https://github.com/huggingface/transformers) and [🤗datasets](https://github.com/huggingface/datasets) libraries. 6 | 7 | You can use these methods to train your own system by following along the associate [notebook](https://github.com/huggingface/notebooks/blob/master/longform-qa/Long_Form_Question_Answering_with_ELI5_and_Wikipedia.ipynb) or [blog post](https://yjernite.github.io/lfqa.html). 8 | -------------------------------------------------------------------------------- /src/transformers/models/gpt2/CONVERSION.md: -------------------------------------------------------------------------------- 1 | Here is how to convert a GPT2 model generated outside of `transformers` 2 | 3 | * [Megatron-LM](https://github.com/NVIDIA/Megatron-LM)-generated model: 4 | 5 | Use [convert_megatron_gpt2_checkpoint.py](../megatron_gpt2/convert_megatron_gpt2_checkpoint.py) 6 | 7 | * [big-science fork of Megatron-Deepspeed](https://github.com/bigscience-workshop/Megatron-DeepSpeed/)-generated model: 8 | 9 | Use the instructions [here](https://github.com/bigscience-workshop/bigscience/tree/aa872e754106f6678e8a9dac8c6962404ba39a6d/train/tr1-13B-base#checkpoint-conversion-and-upload). This approach uses a set of scripts that require the use of this particular fork of Megatron-Deepspeed. 10 | -------------------------------------------------------------------------------- /templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelname": "{{cookiecutter.modelname}}", 3 | "uppercase_modelname": "{{cookiecutter.uppercase_modelname}}", 4 | "lowercase_modelname": "{{cookiecutter.lowercase_modelname}}", 5 | "camelcase_modelname": "{{cookiecutter.camelcase_modelname}}", 6 | "authors": "{{cookiecutter.authors}}", 7 | "checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}", 8 | "tokenizer_type": "{{cookiecutter.tokenizer_type}}", 9 | "generate_tensorflow_pytorch_and_flax": "{{cookiecutter.generate_tensorflow_pytorch_and_flax}}", 10 | "is_encoder_decoder_model": "{{cookiecutter.is_encoder_decoder_model}}" 11 | } 12 | -------------------------------------------------------------------------------- /src/transformers/models/megatron_gpt2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 NVIDIA Corporation and The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /docs/source/de/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: 🤗 Transformers 4 | - local: quicktour 5 | title: Schnellstart 6 | - local: installation 7 | title: Installation 8 | title: Erste Schritte 9 | - sections: 10 | - local: pipeline_tutorial 11 | title: Pipelines für Inferenzen 12 | - local: autoclass_tutorial 13 | title: Laden von vortrainierten Instanzen mit einer AutoClass 14 | - local: preprocessing 15 | title: Vorverarbeiten 16 | - local: training 17 | title: Optimierung eines vortrainierten Modells 18 | - local: accelerate 19 | title: Verteiltes Training mit 🤗 Accelerate 20 | - local: model_sharing 21 | title: Ein Modell teilen 22 | title: Tutorials 23 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_base_100.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-base-100h" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="32" \ 6 | --per_device_eval_batch_size="32" \ 7 | --evaluation_strategy="steps" \ 8 | --save_total_limit="3" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-base" \ 15 | --fp16 \ 16 | --dataset_name="librispeech_asr" \ 17 | --dataset_config_name="clean" \ 18 | --train_split_name="train.100" \ 19 | --preprocessing_num_workers="32" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor 22 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_mbart_cc25_enro.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | python finetune.py \ 5 | --learning_rate=3e-5 \ 6 | --fp16 \ 7 | --do_train \ 8 | --val_check_interval=0.25 \ 9 | --adam_eps 1e-06 \ 10 | --num_train_epochs 6 --src_lang en_XX --tgt_lang ro_RO \ 11 | --data_dir $ENRO_DIR \ 12 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 13 | --train_batch_size=$BS --eval_batch_size=$BS \ 14 | --task translation \ 15 | --warmup_steps 500 \ 16 | --freeze_embeds \ 17 | --model_name_or_path=facebook/mbart-large-cc25 \ 18 | "$@" 19 | -------------------------------------------------------------------------------- /.github/workflows/update_metdata.yml: -------------------------------------------------------------------------------- 1 | name: Update Transformers metadata 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - update_transformers_metadata* 8 | 9 | jobs: 10 | build_and_package: 11 | runs-on: ubuntu-latest 12 | defaults: 13 | run: 14 | shell: bash -l {0} 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - name: Setup environment 20 | run: | 21 | pip install --upgrade pip 22 | pip install datasets pandas==2.0.3 23 | pip install .[torch,tf,flax] 24 | 25 | - name: Update metadata 26 | run: | 27 | python utils/update_metadata.py --token ${{ secrets.LYSANDRE_HF_TOKEN }} --commit_sha ${{ github.sha }} 28 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_lv60_100.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-lv60-100h" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="16" \ 6 | --per_device_eval_batch_size="16" \ 7 | --evaluation_strategy="steps" \ 8 | --save_total_limit="3" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-large-lv60" \ 15 | --fp16 \ 16 | --dataset_name="librispeech_asr" \ 17 | --dataset_config_name="clean" \ 18 | --train_split_name="train.100" \ 19 | --preprocessing_num_workers="32" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor 22 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | torch 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_base_timit_asr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-base-timit-asr" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="20" \ 6 | --per_device_eval_batch_size="20" \ 7 | --evaluation_strategy="steps" \ 8 | --save_steps="500" \ 9 | --eval_steps="100" \ 10 | --logging_steps="50" \ 11 | --learning_rate="5e-4" \ 12 | --warmup_steps="3000" \ 13 | --model_name_or_path="facebook/wav2vec2-base" \ 14 | --fp16 \ 15 | --dataset_name="timit_asr" \ 16 | --train_split_name="train" \ 17 | --validation_split_name="test" \ 18 | --orthography="timit" \ 19 | --preprocessing_num_workers="$(nproc)" \ 20 | --group_by_length \ 21 | --freeze_feature_extractor \ 22 | --verbose_logging \ 23 | -------------------------------------------------------------------------------- /docker/transformers-tensorflow-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | tensorflow-cpu 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] 26 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/dynamic_bs_example.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | export MAX_LEN=128 5 | export m=sshleifer/student_marian_en_ro_6_1 6 | python finetune.py \ 7 | --learning_rate=3e-4 \ 8 | --do_train \ 9 | --fp16 \ 10 | --data_dir wmt_en_ro \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --freeze_encoder --freeze_embeds \ 13 | --train_batch_size=48 --eval_batch_size=64 \ 14 | --tokenizer_name $m --model_name_or_path $m --num_train_epochs=1 \ 15 | --warmup_steps 500 --logger_name wandb --gpus 1 \ 16 | --fp16_opt_level=O1 --task translation \ 17 | "$@" 18 | -------------------------------------------------------------------------------- /docker/transformers-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow-cpu \ 19 | torch 20 | 21 | WORKDIR /workspace 22 | COPY . transformers/ 23 | RUN cd transformers/ && \ 24 | python3 -m pip install --no-cache-dir . 25 | 26 | CMD ["/bin/bash"] 27 | -------------------------------------------------------------------------------- /examples/research_projects/pplm/pplm_classification_head.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class ClassificationHead(nn.Module): 5 | """Classification Head for transformer encoders""" 6 | 7 | def __init__(self, class_size, embed_size): 8 | super().__init__() 9 | self.class_size = class_size 10 | self.embed_size = embed_size 11 | # self.mlp1 = nn.Linear(embed_size, embed_size) 12 | # self.mlp2 = (nn.Linear(embed_size, class_size)) 13 | self.mlp = nn.Linear(embed_size, class_size) 14 | 15 | def forward(self, hidden_state): 16 | # hidden_state = nn.functional.relu(self.mlp1(hidden_state)) 17 | # hidden_state = self.mlp2(hidden_state) 18 | logits = self.mlp(hidden_state) 19 | return logits 20 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/sentence_splitter.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from filelock import FileLock 4 | 5 | 6 | try: 7 | import nltk 8 | 9 | NLTK_AVAILABLE = True 10 | except (ImportError, ModuleNotFoundError): 11 | NLTK_AVAILABLE = False 12 | 13 | if NLTK_AVAILABLE: 14 | with FileLock(".lock") as lock: 15 | nltk.download("punkt", quiet=True) 16 | 17 | 18 | def add_newline_to_end_of_each_sentence(x: str) -> str: 19 | """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS.""" 20 | re.sub("", "", x) # remove pegasus newline char 21 | assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)" 22 | return "\n".join(nltk.sent_tokenize(x)) 23 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/distil_marian_no_teacher.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | export MAX_LEN=128 5 | python finetune.py \ 6 | --learning_rate=3e-4 \ 7 | --do_train \ 8 | --do_predict \ 9 | --fp16 \ 10 | --val_check_interval 0.25 \ 11 | --data_dir $ENRO_DIR \ 12 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 13 | --freeze_encoder --freeze_embeds \ 14 | --train_batch_size=$BS --eval_batch_size=$BS \ 15 | --tokenizer_name $m --model_name_or_path $m \ 16 | --warmup_steps 500 --sortish_sampler --logger_name wandb \ 17 | --gpus 1 --fp16_opt_level=O1 --task translation --num_sanity_val_steps=0 \ 18 | "$@" 19 | -------------------------------------------------------------------------------- /examples/research_projects/mm-imdb/README.md: -------------------------------------------------------------------------------- 1 | ## MM-IMDb 2 | 3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/main/examples/research_projects/mm-imdb/run_mmimdb.py). 4 | 5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata. 6 | 7 | ### Training on MM-IMDb 8 | 9 | ``` 10 | python run_mmimdb.py \ 11 | --data_dir /path/to/mmimdb/dataset/ \ 12 | --model_type bert \ 13 | --model_name_or_path bert-base-uncased \ 14 | --output_dir /path/to/save/dir/ \ 15 | --do_train \ 16 | --do_eval \ 17 | --max_seq_len 512 \ 18 | --gradient_accumulation_steps 20 \ 19 | --num_image_embeds 3 \ 20 | --num_train_epochs 100 \ 21 | --patience 5 22 | ``` 23 | 24 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py37'] 4 | 5 | [tool.ruff] 6 | # Never enforce `E501` (line length violations). 7 | ignore = ["C901", "E501", "E741"] 8 | select = ["C", "E", "F", "I", "W"] 9 | line-length = 119 10 | 11 | # Ignore import violations in all `__init__.py` files. 12 | [tool.ruff.per-file-ignores] 13 | "__init__.py" = ["E402", "F401", "F403", "F811"] 14 | "src/transformers/file_utils.py" = ["F401"] 15 | "src/transformers/utils/dummy_*.py" = ["F401"] 16 | 17 | [tool.ruff.isort] 18 | lines-after-imports = 2 19 | known-first-party = ["transformers"] 20 | 21 | # This is ignored, maybe because of the header? If someone finds a fix, we can uncomment and remove setup.cfg 22 | # [tool.pytest] 23 | # doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS" 24 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_distilbart_cnn.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | export BS=32 5 | export GAS=1 6 | 7 | python finetune.py \ 8 | --learning_rate=3e-5 \ 9 | --fp16 \ 10 | --gpus 1 \ 11 | --do_train \ 12 | --do_predict \ 13 | --val_check_interval 0.25 \ 14 | --n_val 500 \ 15 | --num_train_epochs 2 \ 16 | --freeze_encoder --freeze_embeds --data_dir cnn_dm \ 17 | --max_target_length 142 --val_max_target_length=142 \ 18 | --train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \ 19 | --model_name_or_path sshleifer/student_cnn_12_6 \ 20 | --tokenizer_name facebook/bart-large \ 21 | --warmup_steps 500 \ 22 | --output_dir distilbart-cnn-12-6 \ 23 | "$@" 24 | 25 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_lv60_timit_asr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-lv60-timit-asr" \ 4 | --num_train_epochs="30" \ 5 | --per_device_train_batch_size="2" \ 6 | --per_device_eval_batch_size="2" \ 7 | --gradient_accumulation_steps="4" \ 8 | --evaluation_strategy="steps" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="facebook/wav2vec2-large-lv60" \ 15 | --fp16 \ 16 | --dataset_name="timit_asr" \ 17 | --train_split_name="train" \ 18 | --validation_split_name="test" \ 19 | --orthography="timit" \ 20 | --preprocessing_num_workers="$(nproc)" \ 21 | --group_by_length \ 22 | --freeze_feature_extractor \ 23 | --verbose_logging \ 24 | -------------------------------------------------------------------------------- /src/transformers/sagemaker/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .trainer_sm import SageMakerTrainer 16 | from .training_args_sm import SageMakerTrainingArguments, is_sagemaker_dp_enabled 17 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_wav2vec2_xlsr_turkish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_common_voice.py \ 3 | --model_name_or_path="facebook/wav2vec2-large-xlsr-53" \ 4 | --dataset_config_name="tr" \ 5 | --output_dir=./wav2vec2-large-xlsr-turkish-demo \ 6 | --overwrite_output_dir \ 7 | --num_train_epochs="5" \ 8 | --per_device_train_batch_size="16" \ 9 | --evaluation_strategy="steps" \ 10 | --learning_rate="3e-4" \ 11 | --warmup_steps="500" \ 12 | --fp16 \ 13 | --freeze_feature_extractor \ 14 | --save_steps="400" \ 15 | --eval_steps="400" \ 16 | --save_total_limit="3" \ 17 | --logging_steps="400" \ 18 | --group_by_length \ 19 | --feat_proj_dropout="0.0" \ 20 | --layerdrop="0.1" \ 21 | --gradient_checkpointing \ 22 | --do_train --do_eval 23 | -------------------------------------------------------------------------------- /examples/pytorch/token-classification/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | python3 run_ner.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name conll2003 \ 18 | --output_dir /tmp/test-ner \ 19 | --do_train \ 20 | --do_eval 21 | -------------------------------------------------------------------------------- /examples/pytorch/multiple-choice/run_no_trainer.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | accelerate launch run_swag_no_trainer.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name swag \ 18 | --output_dir /tmp/test-swag-no-trainer \ 19 | --pad_to_max_length 20 | -------------------------------------------------------------------------------- /src/transformers/kernels/deformable_detr/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include "ms_deform_attn.h" 12 | 13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 14 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 15 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 16 | } -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/distil_marian_enro_teacher.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | export WANDB_PROJECT=dmar 4 | # export MAX_LEN=128 5 | python distillation.py \ 6 | --learning_rate=3e-4 \ 7 | --do_train \ 8 | --fp16 \ 9 | --val_check_interval 0.25 \ 10 | --teacher Helsinki-NLP/opus-mt-en-ro \ 11 | --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ 12 | --student_decoder_layers 3 --student_encoder_layers 6 \ 13 | --freeze_encoder --freeze_embeds \ 14 | --model_name_or_path IGNORED \ 15 | --alpha_hid=3. \ 16 | --train_batch_size=$BS --eval_batch_size=$BS \ 17 | --tokenizer_name Helsinki-NLP/opus-mt-en-ro \ 18 | --warmup_steps 500 --logger_name wandb \ 19 | --fp16_opt_level O1 --task translation --normalize_hidden --num_sanity_val_steps=0 \ 20 | "$@" 21 | -------------------------------------------------------------------------------- /src/transformers/kernels/mra/cuda_launch.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define min(a, b) ((a)<(b)?(a):(b)) 6 | #define max(a, b) ((a)>(b)?(a):(b)) 7 | 8 | std::vector index_max_kernel( 9 | at::Tensor index_vals, 10 | at::Tensor indices, 11 | int A_num_block, 12 | int B_num_block 13 | ); 14 | 15 | at::Tensor mm_to_sparse_kernel( 16 | at::Tensor dense_A, 17 | at::Tensor dense_B, 18 | at::Tensor indices 19 | ); 20 | 21 | at::Tensor sparse_dense_mm_kernel( 22 | at::Tensor sparse_A, 23 | at::Tensor indices, 24 | at::Tensor dense_B, 25 | int A_num_block 26 | ); 27 | 28 | at::Tensor reduce_sum_kernel( 29 | at::Tensor sparse_A, 30 | at::Tensor indices, 31 | int A_num_block, 32 | int B_num_block 33 | ); 34 | 35 | at::Tensor scatter_kernel( 36 | at::Tensor dense_A, 37 | at::Tensor indices, 38 | int B_num_block 39 | ); 40 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/train_distilbart_xsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | python distillation.py \ 4 | --teacher facebook/bart-large-xsum --data_dir xsum \ 5 | --tokenizer_name facebook/bart-large-xsum \ 6 | --student_decoder_layers 6 --student_encoder_layers 12 \ 7 | --freeze_encoder --freeze_embeds \ 8 | --learning_rate=3e-4 \ 9 | --do_train \ 10 | --do_predict \ 11 | --fp16 --fp16_opt_level=O1 \ 12 | --val_check_interval 0.1 --n_val 1000 --eval_beams 2 --length_penalty=0.5 \ 13 | --max_target_length=60 --val_max_target_length=60 --test_max_target_length=100 \ 14 | --model_name_or_path IGNORED \ 15 | --alpha_hid=3. \ 16 | --train_batch_size=16 --eval_batch_size=16 --gradient_accumulation_steps=2 \ 17 | --sortish_sampler \ 18 | --num_train_epochs=6 \ 19 | --warmup_steps 500 \ 20 | --output_dir distilbart_xsum_12_6 \ 21 | "$@" 22 | -------------------------------------------------------------------------------- /utils/test_module/custom_modeling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from transformers import PreTrainedModel 4 | 5 | from .custom_configuration import CustomConfig, NoSuperInitConfig 6 | 7 | 8 | class CustomModel(PreTrainedModel): 9 | config_class = CustomConfig 10 | 11 | def __init__(self, config): 12 | super().__init__(config) 13 | self.linear = torch.nn.Linear(config.hidden_size, config.hidden_size) 14 | 15 | def forward(self, x): 16 | return self.linear(x) 17 | 18 | def _init_weights(self, module): 19 | pass 20 | 21 | 22 | class NoSuperInitModel(PreTrainedModel): 23 | config_class = NoSuperInitConfig 24 | 25 | def __init__(self, config): 26 | super().__init__(config) 27 | self.linear = torch.nn.Linear(config.attribute, config.attribute) 28 | 29 | def forward(self, x): 30 | return self.linear(x) 31 | 32 | def _init_weights(self, module): 33 | pass 34 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/finetune_large_xlsr_53_arabic_speech_corpus.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python run_asr.py \ 3 | --output_dir="./wav2vec2-large-xlsr-53-arabic-speech-corpus" \ 4 | --num_train_epochs="50" \ 5 | --per_device_train_batch_size="1" \ 6 | --per_device_eval_batch_size="1" \ 7 | --gradient_accumulation_steps="8" \ 8 | --evaluation_strategy="steps" \ 9 | --save_steps="500" \ 10 | --eval_steps="100" \ 11 | --logging_steps="50" \ 12 | --learning_rate="5e-4" \ 13 | --warmup_steps="3000" \ 14 | --model_name_or_path="elgeish/wav2vec2-large-xlsr-53-arabic" \ 15 | --fp16 \ 16 | --dataset_name="arabic_speech_corpus" \ 17 | --train_split_name="train" \ 18 | --validation_split_name="test" \ 19 | --max_duration_in_seconds="15" \ 20 | --orthography="buckwalter" \ 21 | --preprocessing_num_workers="$(nproc)" \ 22 | --group_by_length \ 23 | --freeze_feature_extractor \ 24 | --target_feature_extractor_sampling_rate \ 25 | --verbose_logging \ 26 | -------------------------------------------------------------------------------- /examples/pytorch/token-classification/run_no_trainer.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | accelerate launch run_ner_no_trainer.py \ 16 | --model_name_or_path bert-base-uncased \ 17 | --dataset_name conll2003 \ 18 | --output_dir /tmp/test-ner \ 19 | --pad_to_max_length \ 20 | --task_name ner \ 21 | --return_entity_level_metrics 22 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/eval_deebert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | if [ $MODEL_TYPE = 'bert' ] 12 | then 13 | MODEL_NAME=${MODEL_NAME}-uncased 14 | fi 15 | 16 | 17 | python -u run_glue_deebert.py \ 18 | --model_type $MODEL_TYPE \ 19 | --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 20 | --task_name $DATASET \ 21 | --do_eval \ 22 | --do_lower_case \ 23 | --data_dir $PATH_TO_DATA/$DATASET \ 24 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 25 | --plot_data_dir ./results/ \ 26 | --max_seq_length 128 \ 27 | --eval_each_highway \ 28 | --eval_highway \ 29 | --overwrite_cache \ 30 | --per_gpu_eval_batch_size=1 31 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/dataset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolume 3 | metadata: 4 | name: huggingface-cluster-disk 5 | spec: 6 | storageClassName: "" 7 | capacity: 8 | storage: 500Gi 9 | accessModes: 10 | - ReadOnlyMany 11 | claimRef: 12 | namespace: default 13 | name: huggingface-cluster-disk-claim 14 | gcePersistentDisk: 15 | pdName: huggingface-cluster-disk 16 | fsType: ext4 17 | readOnly: true 18 | --- 19 | apiVersion: v1 20 | kind: PersistentVolumeClaim 21 | metadata: 22 | name: huggingface-cluster-disk-claim 23 | spec: 24 | # Specify "" as the storageClassName so it matches the PersistentVolume's StorageClass. 25 | # A nil storageClassName value uses the default StorageClass. For details, see 26 | # https://kubernetes.io/docs/concepts/storage/persistent-volumes/#class-1 27 | storageClassName: "" 28 | accessModes: 29 | - ReadOnlyMany 30 | resources: 31 | requests: 32 | storage: 1Ki 33 | -------------------------------------------------------------------------------- /examples/legacy/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Legacy examples 18 | 19 | This folder contains examples which are not actively maintained (mostly contributed by the community). 20 | 21 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working. 22 | -------------------------------------------------------------------------------- /docs/source/en/perf_infer_special.md: -------------------------------------------------------------------------------- 1 | 15 | 16 | # Inference on Specialized Hardware 17 | 18 | This document will be completed soon with information on how to infer on specialized hardware. In the meantime you can check out [the guide for inference on CPUs](perf_infer_cpu). -------------------------------------------------------------------------------- /docs/source/it/perf_infer_special.md: -------------------------------------------------------------------------------- 1 | 15 | 16 | # Inferenza su Hardware Specializzato 17 | 18 | Questo documento sarà completato a breve con la documentazione per l'inferenza su hardware specializzato. Nel frattempo puoi controllare [la guida per fare inferenza sulle CPU](perf_infer_cpu). -------------------------------------------------------------------------------- /docker/transformers-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow \ 19 | torch 20 | 21 | RUN git clone https://github.com/NVIDIA/apex 22 | RUN cd apex && \ 23 | python3 setup.py install && \ 24 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 25 | 26 | WORKDIR /workspace 27 | COPY . transformers/ 28 | RUN cd transformers/ && \ 29 | python3 -m pip install --no-cache-dir . 30 | 31 | CMD ["/bin/bash"] 32 | -------------------------------------------------------------------------------- /examples/research_projects/fsner/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | with open("README.md", "r", encoding="utf-8") as fh: 5 | long_description = fh.read() 6 | 7 | setuptools.setup( 8 | name="fsner", 9 | version="0.0.1", 10 | author="msi sayef", 11 | author_email="msi.sayef@gmail.com", 12 | description="Few-shot Named Entity Recognition", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/huggingface/transformers/tree/main/examples/research_projects/fsner", 16 | project_urls={ 17 | "Bug Tracker": "https://github.com/huggingface/transformers/issues", 18 | }, 19 | classifiers=[ 20 | "Programming Language :: Python :: 3", 21 | "Operating System :: OS Independent", 22 | ], 23 | package_dir={"": "src"}, 24 | packages=setuptools.find_packages(where="src"), 25 | python_requires=">=3.6", 26 | install_requires=["torch>=1.9.0", "transformers>=4.9.2"], 27 | ) 28 | -------------------------------------------------------------------------------- /src/transformers/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .glue import GlueDataset, GlueDataTrainingArguments 16 | from .language_modeling import ( 17 | LineByLineTextDataset, 18 | LineByLineWithRefDataset, 19 | LineByLineWithSOPTextDataset, 20 | TextDataset, 21 | TextDatasetForNextSentencePrediction, 22 | ) 23 | from .squad import SquadDataset, SquadDataTrainingArguments 24 | -------------------------------------------------------------------------------- /examples/research_projects/seq2seq-distillation/finetune_bart_tiny.sh: -------------------------------------------------------------------------------- 1 | # Script for verifying that run_bart_sum can be invoked from its directory 2 | 3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test) 4 | wget https://cdn-datasets.huggingface.co/summarization/cnn_tiny.tgz 5 | tar -xzvf cnn_tiny.tgz 6 | rm cnn_tiny.tgz 7 | 8 | export OUTPUT_DIR_NAME=bart_utest_output 9 | export CURRENT_DIR=${PWD} 10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 11 | 12 | # Make output directory if it doesn't exist 13 | mkdir -p $OUTPUT_DIR 14 | 15 | # Add parent directory to python path to access lightning_base.py and testing_utils.py 16 | export PYTHONPATH="../":"${PYTHONPATH}" 17 | python finetune.py \ 18 | --data_dir=cnn_tiny/ \ 19 | --model_name_or_path=sshleifer/bart-tiny-random \ 20 | --learning_rate=3e-5 \ 21 | --train_batch_size=2 \ 22 | --eval_batch_size=2 \ 23 | --output_dir=$OUTPUT_DIR \ 24 | --num_train_epochs=1 \ 25 | --gpus=0 \ 26 | --do_train "$@" 27 | 28 | rm -rf cnn_tiny 29 | rm -rf $OUTPUT_DIR 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/research_projects/wav2vec2/vocab/buckwalter.json: -------------------------------------------------------------------------------- 1 | { 2 | "": 0, 3 | "": 1, 4 | "": 2, 5 | "": 3, 6 | "/": 4, 7 | "'": 5, 8 | "|": 6, 9 | ">": 7, 10 | "&": 8, 11 | "<": 9, 12 | "}": 10, 13 | "A": 11, 14 | "b": 12, 15 | "p": 13, 16 | "t": 14, 17 | "v": 15, 18 | "j": 16, 19 | "H": 17, 20 | "x": 18, 21 | "d": 19, 22 | "*": 20, 23 | "r": 21, 24 | "z": 22, 25 | "s": 23, 26 | "$": 24, 27 | "S": 25, 28 | "D": 26, 29 | "T": 27, 30 | "Z": 28, 31 | "E": 29, 32 | "g": 30, 33 | "_": 31, 34 | "f": 32, 35 | "q": 33, 36 | "k": 34, 37 | "l": 35, 38 | "m": 36, 39 | "n": 37, 40 | "h": 38, 41 | "w": 39, 42 | "Y": 40, 43 | "y": 41, 44 | "F": 42, 45 | "N": 43, 46 | "K": 44, 47 | "a": 45, 48 | "u": 46, 49 | "i": 47, 50 | "~": 48, 51 | "o": 49, 52 | "`": 50, 53 | "{": 51, 54 | "P": 52, 55 | "J": 53, 56 | "V": 54, 57 | "G": 55 58 | } -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/scripts/initialize_model.py: -------------------------------------------------------------------------------- 1 | from arguments import InitializationArguments 2 | 3 | from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, HfArgumentParser 4 | 5 | 6 | # Configuration 7 | parser = HfArgumentParser(InitializationArguments) 8 | args = parser.parse_args() 9 | 10 | # Load codeparrot tokenizer trained for Python code tokenization 11 | tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name) 12 | 13 | # Config: "scale_attn_by_layer_idx" and "reorder_and_upcast_attn" are Mistral stability tweaks 14 | config_kwargs = { 15 | "vocab_size": len(tokenizer), 16 | "scale_attn_by_inverse_layer_idx": True, 17 | "reorder_and_upcast_attn": True, 18 | } 19 | 20 | # Load model config (GPT-2 large in this case) 21 | config = AutoConfig.from_pretrained(args.config_name, **config_kwargs) 22 | 23 | # Initialize new model with config 24 | model = AutoModelForCausalLM.from_config(config) 25 | 26 | # Save model to the hub 27 | model.save_pretrained(args.model_name, push_to_hub=args.push_to_hub) 28 | -------------------------------------------------------------------------------- /src/transformers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseTransformersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/test_data/fsmt/build-eval-data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import io 4 | import json 5 | import subprocess 6 | 7 | 8 | pairs = [ 9 | ["en", "ru"], 10 | ["ru", "en"], 11 | ["en", "de"], 12 | ["de", "en"], 13 | ] 14 | 15 | n_objs = 8 16 | 17 | 18 | def get_all_data(pairs, n_objs): 19 | text = {} 20 | for src, tgt in pairs: 21 | pair = f"{src}-{tgt}" 22 | cmd = f"sacrebleu -t wmt19 -l {pair} --echo src".split() 23 | src_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines() 24 | cmd = f"sacrebleu -t wmt19 -l {pair} --echo ref".split() 25 | tgt_lines = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode("utf-8").splitlines() 26 | text[pair] = {"src": src_lines[:n_objs], "tgt": tgt_lines[:n_objs]} 27 | return text 28 | 29 | 30 | text = get_all_data(pairs, n_objs) 31 | filename = "./fsmt_val_data.json" 32 | with io.open(filename, "w", encoding="utf-8") as f: 33 | bleu_data = json.dump(text, f, indent=2, ensure_ascii=False) 34 | -------------------------------------------------------------------------------- /src/transformers/utils/dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class Pop2PianoFeatureExtractor(metaclass=DummyObject): 6 | _backends = ["essentia", "librosa", "pretty_midi", "scipy", "torch"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["essentia", "librosa", "pretty_midi", "scipy", "torch"]) 10 | 11 | 12 | class Pop2PianoTokenizer(metaclass=DummyObject): 13 | _backends = ["essentia", "librosa", "pretty_midi", "scipy", "torch"] 14 | 15 | def __init__(self, *args, **kwargs): 16 | requires_backends(self, ["essentia", "librosa", "pretty_midi", "scipy", "torch"]) 17 | 18 | 19 | class Pop2PianoProcessor(metaclass=DummyObject): 20 | _backends = ["essentia", "librosa", "pretty_midi", "scipy", "torch"] 21 | 22 | def __init__(self, *args, **kwargs): 23 | requires_backends(self, ["essentia", "librosa", "pretty_midi", "scipy", "torch"]) 24 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/run_glue.sh: -------------------------------------------------------------------------------- 1 | # Install example requirements 2 | pip install -r ../requirements.txt 3 | 4 | # Download glue data 5 | python3 ../../utils/download_glue_data.py 6 | 7 | export TASK=mrpc 8 | export DATA_DIR=./glue_data/MRPC/ 9 | export MAX_LENGTH=128 10 | export LEARNING_RATE=2e-5 11 | export BERT_MODEL=bert-base-cased 12 | export BATCH_SIZE=32 13 | export NUM_EPOCHS=3 14 | export SEED=2 15 | export OUTPUT_DIR_NAME=mrpc-pl-bert 16 | export CURRENT_DIR=${PWD} 17 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 18 | 19 | # Make output directory if it doesn't exist 20 | mkdir -p $OUTPUT_DIR 21 | # Add parent directory to python path to access lightning_base.py 22 | export PYTHONPATH="../":"${PYTHONPATH}" 23 | 24 | python3 run_glue.py --gpus 1 --data_dir $DATA_DIR \ 25 | --task $TASK \ 26 | --model_name_or_path $BERT_MODEL \ 27 | --output_dir $OUTPUT_DIR \ 28 | --max_seq_length $MAX_LENGTH \ 29 | --learning_rate $LEARNING_RATE \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --train_batch_size $BATCH_SIZE \ 32 | --seed $SEED \ 33 | --do_train \ 34 | --do_predict 35 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/entropy_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | if [ $MODEL_TYPE = 'bert' ] 12 | then 13 | MODEL_NAME=${MODEL_NAME}-uncased 14 | fi 15 | 16 | ENTROPIES="0 0.1 0.2 0.3 0.4 0.5 0.6 0.7" 17 | 18 | for ENTROPY in $ENTROPIES; do 19 | python -u run_glue_deebert.py \ 20 | --model_type $MODEL_TYPE \ 21 | --model_name_or_path ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 22 | --task_name $DATASET \ 23 | --do_eval \ 24 | --do_lower_case \ 25 | --data_dir $PATH_TO_DATA/$DATASET \ 26 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 27 | --plot_data_dir ./results/ \ 28 | --max_seq_length 128 \ 29 | --early_exit_entropy $ENTROPY \ 30 | --eval_highway \ 31 | --overwrite_cache \ 32 | --per_gpu_eval_batch_size=1 33 | done 34 | -------------------------------------------------------------------------------- /src/transformers/models/byt5/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from ...utils import _LazyModule 18 | 19 | 20 | _import_structure = {"tokenization_byt5": ["ByT5Tokenizer"]} 21 | 22 | 23 | if TYPE_CHECKING: 24 | from .tokenization_byt5 import ByT5Tokenizer 25 | else: 26 | import sys 27 | 28 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 29 | -------------------------------------------------------------------------------- /src/transformers/models/deprecated/tapex/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from ....utils import _LazyModule 17 | 18 | 19 | _import_structure = {"tokenization_tapex": ["TapexTokenizer"]} 20 | 21 | 22 | if TYPE_CHECKING: 23 | from .tokenization_tapex import TapexTokenizer 24 | 25 | 26 | else: 27 | import sys 28 | 29 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure) 30 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/finetune.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 16 | # run ./finetune.sh --help to see all the possible options 17 | python finetune_trainer.py \ 18 | --learning_rate=3e-5 \ 19 | --fp16 \ 20 | --do_train --do_eval --do_predict \ 21 | --evaluation_strategy steps \ 22 | --predict_with_generate \ 23 | --n_val 1000 \ 24 | "$@" 25 | -------------------------------------------------------------------------------- /src/transformers/models/phobert/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from ...utils import _LazyModule 18 | 19 | 20 | _import_structure = {"tokenization_phobert": ["PhobertTokenizer"]} 21 | 22 | 23 | if TYPE_CHECKING: 24 | from .tokenization_phobert import PhobertTokenizer 25 | 26 | else: 27 | import sys 28 | 29 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 30 | -------------------------------------------------------------------------------- /examples/research_projects/deebert/train_deebert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | PATH_TO_DATA=/h/xinji/projects/GLUE 5 | 6 | MODEL_TYPE=bert # bert or roberta 7 | MODEL_SIZE=base # base or large 8 | DATASET=MRPC # SST-2, MRPC, RTE, QNLI, QQP, or MNLI 9 | 10 | MODEL_NAME=${MODEL_TYPE}-${MODEL_SIZE} 11 | EPOCHS=10 12 | if [ $MODEL_TYPE = 'bert' ] 13 | then 14 | EPOCHS=3 15 | MODEL_NAME=${MODEL_NAME}-uncased 16 | fi 17 | 18 | 19 | python -u run_glue_deebert.py \ 20 | --model_type $MODEL_TYPE \ 21 | --model_name_or_path $MODEL_NAME \ 22 | --task_name $DATASET \ 23 | --do_train \ 24 | --do_eval \ 25 | --do_lower_case \ 26 | --data_dir $PATH_TO_DATA/$DATASET \ 27 | --max_seq_length 128 \ 28 | --per_gpu_eval_batch_size=1 \ 29 | --per_gpu_train_batch_size=8 \ 30 | --learning_rate 2e-5 \ 31 | --num_train_epochs $EPOCHS \ 32 | --overwrite_output_dir \ 33 | --seed 42 \ 34 | --output_dir ./saved_models/${MODEL_TYPE}-${MODEL_SIZE}/$DATASET/two_stage \ 35 | --plot_data_dir ./results/ \ 36 | --save_steps 0 \ 37 | --overwrite_cache \ 38 | --eval_after_first_stage 39 | -------------------------------------------------------------------------------- /src/transformers/data/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels 16 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features 17 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor 18 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels 19 | -------------------------------------------------------------------------------- /src/transformers/models/bertweet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from ...utils import _LazyModule 18 | 19 | 20 | _import_structure = {"tokenization_bertweet": ["BertweetTokenizer"]} 21 | 22 | 23 | if TYPE_CHECKING: 24 | from .tokenization_bertweet import BertweetTokenizer 25 | 26 | else: 27 | import sys 28 | 29 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 30 | -------------------------------------------------------------------------------- /docker/transformers-doc-builder/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | LABEL maintainer="Hugging Face" 3 | 4 | RUN apt update 5 | RUN git clone https://github.com/huggingface/transformers 6 | 7 | RUN python3 -m pip install --no-cache-dir --upgrade pip && python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder ./transformers[dev] 8 | RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y tesseract-ocr 9 | 10 | # Torch needs to be installed before deepspeed 11 | RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed] 12 | 13 | RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract 14 | RUN python3 -m pip install --no-cache-dir pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com 15 | RUN python3 -m pip install -U "itsdangerous<2.1.0" 16 | 17 | # Test if the image could successfully build the doc. before publishing the image 18 | RUN doc-builder build transformers transformers/docs/source/en --build_dir doc-build-dev --notebook_dir notebooks/transformers_doc --clean 19 | RUN rm -rf doc-build-dev -------------------------------------------------------------------------------- /src/transformers/models/wav2vec2_with_lm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from ...utils import _LazyModule 17 | 18 | 19 | _import_structure = {"processing_wav2vec2_with_lm": ["Wav2Vec2ProcessorWithLM"]} 20 | 21 | 22 | if TYPE_CHECKING: 23 | from .processing_wav2vec2_with_lm import Wav2Vec2ProcessorWithLM 24 | else: 25 | import sys 26 | 27 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 28 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-tpu/bert-base-cased.jsonnet: -------------------------------------------------------------------------------- 1 | local base = import 'templates/base.libsonnet'; 2 | local tpus = import 'templates/tpus.libsonnet'; 3 | local utils = import "templates/utils.libsonnet"; 4 | local volumes = import "templates/volumes.libsonnet"; 5 | 6 | local bertBaseCased = base.BaseTest { 7 | frameworkPrefix: "hf", 8 | modelName: "bert-base-cased", 9 | mode: "example", 10 | configMaps: [], 11 | 12 | timeout: 3600, # 1 hour, in seconds 13 | 14 | image: std.extVar('image'), 15 | imageTag: std.extVar('image-tag'), 16 | 17 | tpuSettings+: { 18 | softwareVersion: "pytorch-nightly", 19 | }, 20 | accelerator: tpus.v3_8, 21 | 22 | volumeMap+: { 23 | datasets: volumes.PersistentVolumeSpec { 24 | name: "huggingface-cluster-disk", 25 | mountPath: "/datasets", 26 | }, 27 | }, 28 | command: utils.scriptCommand( 29 | ||| 30 | python -m pytest -s transformers/examples/pytorch/test_xla_examples.py -v 31 | test_exit_code=$? 32 | echo "\nFinished running commands.\n" 33 | test $test_exit_code -eq 0 34 | ||| 35 | ), 36 | }; 37 | 38 | bertBaseCased.oneshotJob 39 | -------------------------------------------------------------------------------- /src/transformers/models/wav2vec2_phoneme/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from ...utils import _LazyModule 17 | 18 | 19 | _import_structure = {"tokenization_wav2vec2_phoneme": ["Wav2Vec2PhonemeCTCTokenizer"]} 20 | 21 | 22 | if TYPE_CHECKING: 23 | from .tokenization_wav2vec2_phoneme import Wav2Vec2PhonemeCTCTokenizer 24 | else: 25 | import sys 26 | 27 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 28 | -------------------------------------------------------------------------------- /docs/source/en/main_classes/keras_callbacks.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Keras callbacks 18 | 19 | When training a Transformers model with Keras, there are some library-specific callbacks available to automate common 20 | tasks: 21 | 22 | ## KerasMetricCallback 23 | 24 | [[autodoc]] KerasMetricCallback 25 | 26 | ## PushToHubCallback 27 | 28 | [[autodoc]] PushToHubCallback 29 | -------------------------------------------------------------------------------- /examples/research_projects/vqgan-clip/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import matplotlib.pyplot as plt 4 | import torch 5 | 6 | 7 | def freeze_module(module): 8 | for param in module.parameters(): 9 | param.requires_grad = False 10 | 11 | 12 | def get_device(): 13 | device = "cuda" if torch.cuda.is_available() else "cpu" 14 | if torch.backends.mps.is_available() and torch.backends.mps.is_built(): 15 | device = "mps" 16 | if device == "mps": 17 | print( 18 | "WARNING: MPS currently doesn't seem to work, and messes up backpropagation without any visible torch" 19 | " errors. I recommend using CUDA on a colab notebook or CPU instead if you're facing inexplicable issues" 20 | " with generations." 21 | ) 22 | return device 23 | 24 | 25 | def show_pil(img): 26 | fig = plt.imshow(img) 27 | fig.axes.get_xaxis().set_visible(False) 28 | fig.axes.get_yaxis().set_visible(False) 29 | plt.show() 30 | 31 | 32 | def get_timestamp(): 33 | current_time = datetime.now() 34 | timestamp = current_time.strftime("%H:%M:%S") 35 | return timestamp 36 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/finetune_tpu.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | export TPU_NUM_CORES=8 16 | 17 | # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path 18 | # run ./finetune_tpu.sh --help to see all the possible options 19 | python xla_spawn.py --num_cores $TPU_NUM_CORES \ 20 | finetune_trainer.py \ 21 | --learning_rate=3e-5 \ 22 | --do_train --do_eval \ 23 | --evaluation_strategy steps \ 24 | --prediction_loss_only \ 25 | --n_val 1000 \ 26 | "$@" 27 | -------------------------------------------------------------------------------- /scripts/fsmt/tests-to-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # these scripts need to be run before any changes to FSMT-related code - it should cover all bases 17 | 18 | CUDA_VISIBLE_DEVICES="" RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py 19 | RUN_SLOW=1 pytest --disable-warnings tests/test_tokenization_fsmt.py tests/test_configuration_auto.py tests/test_modeling_fsmt.py examples/seq2seq/test_fsmt_bleu_score.py 20 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/train.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0001 none none A plane is taking off. An air plane is taking off. 5.000 3 | 1 main-captions MSRvid 2012test 0004 none none A man is playing a large flute. A man is playing a flute. 3.800 4 | 2 main-captions MSRvid 2012test 0005 none none A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 3.800 5 | 3 main-captions MSRvid 2012test 0006 none none Three men are playing chess. Two men are playing chess. 2.600 6 | 4 main-captions MSRvid 2012test 0009 none none A man is playing the cello. A man seated is playing the cello. 4.250 7 | 5 main-captions MSRvid 2012test 0011 none none Some men are fighting. Two men are fighting. 4.250 8 | 6 main-captions MSRvid 2012test 0012 none none A man is smoking. A man is skating. 0.500 9 | 7 main-captions MSRvid 2012test 0013 none none The man is playing the piano. The man is playing the guitar. 1.600 10 | 8 main-captions MSRvid 2012test 0014 none none A man is playing on a guitar and singing. A woman is playing an acoustic guitar and singing. 2.200 11 | -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/scripts/tests/test_deduplicate.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from datasets import Dataset 4 | from minhash_deduplication import deduplicate_dataset, make_duplicate_clusters 5 | 6 | 7 | def get_dataset(): 8 | data_dict = { 9 | "repo_name": ["test_repo1", "test_repo2", "test_repo3"], 10 | "path": ["test_1.py", "test_2.py", "unit_test.py"], 11 | "content": ["a " * 20, "a " * 30, "b " * 7], 12 | } 13 | dataset = Dataset.from_dict(data_dict) 14 | return dataset 15 | 16 | 17 | class MakeDuplicateClustersTest(TestCase): 18 | def test_make_duplicate_clusters(self): 19 | ds = get_dataset() 20 | duplicate_clusters = make_duplicate_clusters(ds, 0.85) 21 | self.assertEqual(len(duplicate_clusters[0]), 2) 22 | 23 | def test_deduplicate_dataset(self): 24 | ds = get_dataset() 25 | ds_filter, duplicate_clusters = deduplicate_dataset(ds) 26 | self.assertEqual(len(ds_filter), 2) 27 | print(duplicate_clusters) 28 | self.assertEqual(duplicate_clusters[0][0]["copies"], 2) 29 | self.assertEqual(duplicate_clusters[0][0]["is_extreme"], True) 30 | -------------------------------------------------------------------------------- /src/transformers/utils/bitsandbytes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import warnings 15 | 16 | 17 | warnings.warn( 18 | "transformers.utils.bitsandbytes module is deprecated and will be removed in a future version. Please import bitsandbytes modules directly from transformers.integrations", 19 | FutureWarning, 20 | ) 21 | 22 | from ..integrations import ( # noqa 23 | get_keys_to_not_convert, 24 | replace_8bit_linear, 25 | replace_with_bnb_linear, 26 | set_module_8bit_tensor_to_device, 27 | set_module_quantized_tensor_to_device, 28 | ) 29 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/dev.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0000 none none A man with a hard hat is dancing. A man wearing a hard hat is dancing. 5.000 3 | 1 main-captions MSRvid 2012test 0002 none none A young child is riding a horse. A child is riding a horse. 4.750 4 | 2 main-captions MSRvid 2012test 0003 none none A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 5.000 5 | 3 main-captions MSRvid 2012test 0007 none none A woman is playing the guitar. A man is playing guitar. 2.400 6 | 4 main-captions MSRvid 2012test 0008 none none A woman is playing the flute. A man is playing a flute. 2.750 7 | 5 main-captions MSRvid 2012test 0010 none none A woman is cutting an onion. A man is cutting onions. 2.615 8 | 6 main-captions MSRvid 2012test 0015 none none A man is erasing a chalk board. The man is erasing the chalk board. 5.000 9 | 7 main-captions MSRvid 2012test 0023 none none A woman is carrying a boy. A woman is carrying her baby. 2.333 10 | 8 main-captions MSRvid 2012test 0027 none none Three men are playing guitars. Three men are on stage playing guitars. 3.750 11 | -------------------------------------------------------------------------------- /.github/conda/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "transformers" %} 2 | 3 | package: 4 | name: "{{ name|lower }}" 5 | version: "{{ TRANSFORMERS_VERSION }}" 6 | 7 | source: 8 | path: ../../ 9 | 10 | build: 11 | noarch: python 12 | 13 | requirements: 14 | host: 15 | - python 16 | - pip 17 | - numpy >=1.17 18 | - dataclasses 19 | - huggingface_hub 20 | - packaging 21 | - filelock 22 | - requests 23 | - tqdm >=4.27 24 | - sacremoses 25 | - regex !=2019.12.17 26 | - protobuf 27 | - tokenizers >=0.11.1,!=0.11.3,<0.13 28 | - pyyaml >=5.1 29 | run: 30 | - python 31 | - numpy >=1.17 32 | - dataclasses 33 | - huggingface_hub 34 | - packaging 35 | - filelock 36 | - requests 37 | - tqdm >=4.27 38 | - sacremoses 39 | - regex !=2019.12.17 40 | - protobuf 41 | - tokenizers >=0.11.1,!=0.11.3,<0.13 42 | - pyyaml >=5.1 43 | 44 | test: 45 | imports: 46 | - transformers 47 | 48 | about: 49 | home: https://huggingface.co 50 | license: Apache License 2.0 51 | license_file: LICENSE 52 | summary: "🤗Transformers: State-of-the-art Natural Language Processing for Pytorch and TensorFlow 2.0." 53 | -------------------------------------------------------------------------------- /examples/research_projects/codeparrot/scripts/bpe_training.py: -------------------------------------------------------------------------------- 1 | from arguments import TokenizerTrainingArguments 2 | from datasets import load_dataset 3 | from tqdm import tqdm 4 | 5 | from transformers import AutoTokenizer, HfArgumentParser 6 | from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode 7 | 8 | 9 | # Iterator for Training 10 | def batch_iterator(batch_size=10): 11 | for _ in tqdm(range(0, args.n_examples, batch_size)): 12 | yield [next(iter_dataset)[args.text_column] for _ in range(batch_size)] 13 | 14 | 15 | # Configuration 16 | parser = HfArgumentParser(TokenizerTrainingArguments) 17 | args = parser.parse_args() 18 | 19 | # Base tokenizer 20 | tokenizer = AutoTokenizer.from_pretrained(args.base_tokenizer) 21 | base_vocab = list(bytes_to_unicode().values()) 22 | 23 | # Load dataset 24 | dataset = load_dataset(args.dataset_name, split="train", streaming=True) 25 | iter_dataset = iter(dataset) 26 | 27 | 28 | # Training and saving 29 | new_tokenizer = tokenizer.train_new_from_iterator( 30 | batch_iterator(), vocab_size=args.vocab_size, initial_alphabet=base_vocab 31 | ) 32 | new_tokenizer.save_pretrained(args.tokenizer_name, push_to_hub=args.push_to_hub) 33 | -------------------------------------------------------------------------------- /src/transformers/sagemaker/trainer_sm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import warnings 15 | 16 | from ..trainer import Trainer 17 | from ..utils import logging 18 | 19 | 20 | logger = logging.get_logger(__name__) 21 | 22 | 23 | class SageMakerTrainer(Trainer): 24 | def __init__(self, args=None, **kwargs): 25 | warnings.warn( 26 | "`SageMakerTrainer` is deprecated and will be removed in v5 of Transformers. You can use `Trainer` " 27 | "instead.", 28 | FutureWarning, 29 | ) 30 | super().__init__(args=args, **kwargs) 31 | -------------------------------------------------------------------------------- /docker/transformers-tensorflow-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 2 | LABEL maintainer="Hugging Face" 3 | 4 | ARG DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN apt update 7 | RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg 8 | RUN python3 -m pip install --no-cache-dir --upgrade pip 9 | 10 | ARG REF=main 11 | RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF 12 | RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-tensorflow,testing] 13 | 14 | # If set to nothing, will install the latest version 15 | ARG TENSORFLOW='2.13' 16 | 17 | RUN [ ${#TENSORFLOW} -gt 0 ] && VERSION='tensorflow=='$TENSORFLOW'.*' || VERSION='tensorflow'; python3 -m pip install --no-cache-dir -U $VERSION 18 | RUN python3 -m pip uninstall -y torch flax 19 | RUN python3 -m pip install -U "itsdangerous<2.1.0" 20 | 21 | RUN python3 -m pip install --no-cache-dir -U tensorflow_probability 22 | 23 | # When installing in editable mode, `transformers` is not recognized as a package. 24 | # this line must be added in order for python to be aware of transformers. 25 | RUN cd transformers && python3 setup.py develop 26 | -------------------------------------------------------------------------------- /src/transformers/models/bert_japanese/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from ...utils import _LazyModule 18 | 19 | 20 | _import_structure = {"tokenization_bert_japanese": ["BertJapaneseTokenizer", "CharacterTokenizer", "MecabTokenizer"]} 21 | 22 | 23 | if TYPE_CHECKING: 24 | from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer 25 | 26 | else: 27 | import sys 28 | 29 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 30 | -------------------------------------------------------------------------------- /examples/research_projects/rag/finetune_rag.sh: -------------------------------------------------------------------------------- 1 | # Add parent directory to python path to access lightning_base.py 2 | export PYTHONPATH="../":"${PYTHONPATH}" 3 | 4 | # A sample finetuning run, you need to specify data_dir, output_dir and model_name_or_path 5 | # run ./examples/rag/finetune_rag.sh --help to see all the possible options 6 | 7 | python examples/rag/finetune_rag.py \ 8 | --data_dir $DATA_DIR \ 9 | --output_dir $OUTPUT_DIR \ 10 | --model_name_or_path $MODEL_NAME_OR_PATH \ 11 | --model_type rag_sequence \ 12 | --fp16 \ 13 | --gpus 8 \ 14 | --profile \ 15 | --do_train \ 16 | --do_predict \ 17 | --n_val -1 \ 18 | --train_batch_size 8 \ 19 | --eval_batch_size 1 \ 20 | --max_source_length 128 \ 21 | --max_target_length 25 \ 22 | --val_max_target_length 25 \ 23 | --test_max_target_length 25 \ 24 | --label_smoothing 0.1 \ 25 | --dropout 0.1 \ 26 | --attention_dropout 0.1 \ 27 | --weight_decay 0.001 \ 28 | --adam_epsilon 1e-08 \ 29 | --max_grad_norm 0.1 \ 30 | --lr_scheduler polynomial \ 31 | --learning_rate 3e-05 \ 32 | --num_train_epochs 100 \ 33 | --warmup_steps 500 \ 34 | --gradient_accumulation_steps 1 \ 35 | -------------------------------------------------------------------------------- /docs/source/en/perf_train_tpu.md: -------------------------------------------------------------------------------- 1 | 15 | 16 | # Training on TPUs 17 | 18 | 19 | 20 | Note: Most of the strategies introduced in the [single GPU section](perf_train_gpu_one) (such as mixed precision training or gradient accumulation) and [multi-GPU section](perf_train_gpu_many) are generic and apply to training models in general so make sure to have a look at it before diving into this section. 21 | 22 | 23 | 24 | This document will be completed soon with information on how to train on TPUs. 25 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/run_pos.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f ./dev.txt ]; then 2 | echo "Download dev dataset...." 3 | curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu' 4 | fi 5 | 6 | if ! [ -f ./test.txt ]; then 7 | echo "Download test dataset...." 8 | curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu' 9 | fi 10 | 11 | if ! [ -f ./train.txt ]; then 12 | echo "Download train dataset...." 13 | curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu' 14 | fi 15 | 16 | export MAX_LENGTH=200 17 | export BERT_MODEL=bert-base-uncased 18 | export OUTPUT_DIR=postagger-model 19 | export BATCH_SIZE=32 20 | export NUM_EPOCHS=3 21 | export SAVE_STEPS=750 22 | export SEED=1 23 | 24 | python3 run_ner.py \ 25 | --task_type POS \ 26 | --data_dir . \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | 38 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/scripts/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from transformers import AutoTokenizer 4 | 5 | 6 | dataset = sys.argv[1] 7 | model_name_or_path = sys.argv[2] 8 | max_len = int(sys.argv[3]) 9 | 10 | subword_len_counter = 0 11 | 12 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 13 | max_len -= tokenizer.num_special_tokens_to_add() 14 | 15 | with open(dataset, "rt") as f_p: 16 | for line in f_p: 17 | line = line.rstrip() 18 | 19 | if not line: 20 | print(line) 21 | subword_len_counter = 0 22 | continue 23 | 24 | token = line.split()[0] 25 | 26 | current_subwords_len = len(tokenizer.tokenize(token)) 27 | 28 | # Token contains strange control characters like \x96 or \x95 29 | # Just filter out the complete line 30 | if current_subwords_len == 0: 31 | continue 32 | 33 | if (subword_len_counter + current_subwords_len) > max_len: 34 | print("") 35 | print(line) 36 | subword_len_counter = current_subwords_len 37 | continue 38 | 39 | subword_len_counter += current_subwords_len 40 | 41 | print(line) 42 | -------------------------------------------------------------------------------- /examples/legacy/token-classification/run_chunk.sh: -------------------------------------------------------------------------------- 1 | if ! [ -f ./dev.txt ]; then 2 | echo "Downloading CONLL2003 dev dataset...." 3 | curl -L -o ./dev.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/valid.txt' 4 | fi 5 | 6 | if ! [ -f ./test.txt ]; then 7 | echo "Downloading CONLL2003 test dataset...." 8 | curl -L -o ./test.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/test.txt' 9 | fi 10 | 11 | if ! [ -f ./train.txt ]; then 12 | echo "Downloading CONLL2003 train dataset...." 13 | curl -L -o ./train.txt 'https://github.com/davidsbatista/NER-datasets/raw/master/CONLL2003/train.txt' 14 | fi 15 | 16 | export MAX_LENGTH=200 17 | export BERT_MODEL=bert-base-uncased 18 | export OUTPUT_DIR=chunker-model 19 | export BATCH_SIZE=32 20 | export NUM_EPOCHS=3 21 | export SAVE_STEPS=750 22 | export SEED=1 23 | 24 | python3 run_ner.py \ 25 | --task_type Chunk \ 26 | --data_dir . \ 27 | --model_name_or_path $BERT_MODEL \ 28 | --output_dir $OUTPUT_DIR \ 29 | --max_seq_length $MAX_LENGTH \ 30 | --num_train_epochs $NUM_EPOCHS \ 31 | --per_gpu_train_batch_size $BATCH_SIZE \ 32 | --save_steps $SAVE_STEPS \ 33 | --seed $SEED \ 34 | --do_train \ 35 | --do_eval \ 36 | --do_predict 37 | 38 | -------------------------------------------------------------------------------- /model_cards/README.md: -------------------------------------------------------------------------------- 1 | ## 🔥 Model cards now live inside each huggingface.co model repo 🔥 2 | 3 | 4 | For consistency, ease of use and scalability, `README.md` model cards now live directly inside each model repo on the HuggingFace model hub. 5 | 6 | ### How to update a model card 7 | 8 | You can directly update a model card inside any model repo you have **write access** to, i.e.: 9 | - a model under your username namespace 10 | - a model under any organization you are a part of. 11 | 12 | You can either: 13 | - update it, commit and push using your usual git workflow (command line, GUI, etc.) 14 | - or edit it directly from the website's UI. 15 | 16 | **What if you want to create or update a model card for a model you don't have write access to?** 17 | 18 | In that case, you can open a [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions)! Check out the [announcement](https://huggingface.co/blog/community-update) of this feature for more details 🤗. 19 | 20 | ### What happened to the model cards here? 21 | 22 | We migrated every model card from the repo to its corresponding huggingface.co model repo. Individual commits were preserved, and they link back to the original commit on GitHub. 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-model-addition.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F31F New model addition" 2 | description: Submit a proposal/request to implement a new model 3 | labels: [ "New model" ] 4 | 5 | body: 6 | - type: textarea 7 | id: description-request 8 | validations: 9 | required: true 10 | attributes: 11 | label: Model description 12 | description: | 13 | Put any and all important information relative to the model 14 | 15 | - type: checkboxes 16 | id: information-tasks 17 | attributes: 18 | label: Open source status 19 | description: | 20 | Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `transformers`. 21 | options: 22 | - label: "The model implementation is available" 23 | - label: "The model weights are available" 24 | 25 | - type: textarea 26 | id: additional-info 27 | attributes: 28 | label: Provide useful links for the implementation 29 | description: | 30 | Please provide information regarding the implementation, the weights, and the authors. 31 | Please mention the authors by @gh-username if you're aware of their usernames. 32 | -------------------------------------------------------------------------------- /src/transformers/generation_tf_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. 3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import warnings 18 | 19 | from .generation import TFGenerationMixin 20 | 21 | 22 | class TFGenerationMixin(TFGenerationMixin): 23 | # warning at import time 24 | warnings.warn( 25 | "Importing `TFGenerationMixin` from `src/transformers/generation_tf_utils.py` is deprecated and will " 26 | "be removed in Transformers v5. Import as `from transformers import TFGenerationMixin` instead.", 27 | FutureWarning, 28 | ) 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F680 Feature request" 2 | description: Submit a proposal/request for a new transformers feature 3 | labels: [ "feature" ] 4 | body: 5 | - type: textarea 6 | id: feature-request 7 | validations: 8 | required: true 9 | attributes: 10 | label: Feature request 11 | description: | 12 | A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist. 13 | 14 | - type: textarea 15 | id: motivation 16 | validations: 17 | required: true 18 | attributes: 19 | label: Motivation 20 | description: | 21 | Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. 22 | 23 | 24 | - type: textarea 25 | id: contribution 26 | validations: 27 | required: true 28 | attributes: 29 | label: Your contribution 30 | description: | 31 | Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md) 32 | -------------------------------------------------------------------------------- /docs/source/en/perf_train_special.md: -------------------------------------------------------------------------------- 1 | 15 | 16 | # Training on Specialized Hardware 17 | 18 | 19 | 20 | Note: Most of the strategies introduced in the [single GPU section](perf_train_gpu_one) (such as mixed precision training or gradient accumulation) and [multi-GPU section](perf_train_gpu_many) are generic and apply to training models in general so make sure to have a look at it before diving into this section. 21 | 22 | 23 | 24 | This document will be completed soon with information on how to train on specialized hardware. 25 | -------------------------------------------------------------------------------- /src/transformers/generation_flax_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Google AI Flax Team Authors, and The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import warnings 18 | 19 | from .generation import FlaxGenerationMixin 20 | 21 | 22 | class FlaxGenerationMixin(FlaxGenerationMixin): 23 | # warning at import time 24 | warnings.warn( 25 | "Importing `FlaxGenerationMixin` from `src/transformers/generation_flax_utils.py` is deprecated and will " 26 | "be removed in Transformers v5. Import as `from transformers import FlaxGenerationMixin` instead.", 27 | FutureWarning, 28 | ) 29 | -------------------------------------------------------------------------------- /src/transformers/generation_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google AI Language Team Authors, Facebook AI Research authors and The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import warnings 18 | 19 | from .generation import GenerationMixin 20 | 21 | 22 | class GenerationMixin(GenerationMixin): 23 | # warning at import time 24 | warnings.warn( 25 | "Importing `GenerationMixin` from `src/transformers/generation_utils.py` is deprecated and will " 26 | "be removed in Transformers v5. Import as `from transformers import GenerationMixin` instead.", 27 | FutureWarning, 28 | ) 29 | -------------------------------------------------------------------------------- /examples/research_projects/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Research projects 18 | 19 | This folder contains various research projects using 🤗 Transformers. They are not maintained and require a specific 20 | version of 🤗 Transformers that is indicated in the requirements file of each folder. Updating them to the most recent version of the library will require some work. 21 | 22 | To use any of them, just run the command 23 | ``` 24 | pip install -r requirements.txt 25 | ``` 26 | inside the folder of your choice. 27 | 28 | If you need help with any of those, contact the author(s), indicated at the top of the `README` of each folder. 29 | -------------------------------------------------------------------------------- /docs/source/it/perf_train_tpu.md: -------------------------------------------------------------------------------- 1 | 15 | 16 | # Addestramento su TPU 17 | 18 | 19 | 20 | Nota: Molte delle strategie introdotte nella [sezione sulla GPU singola](perf_train_gpu_one) (come mixed precision training o gradient accumulation) e [sezione multi-GPU](perf_train_gpu_many) sono generiche e applicabili all'addestramento di modelli in generale quindi assicurati di dargli un'occhiata prima di immergerti in questa sezione. 21 | 22 | 23 | 24 | Questo documento sarà presto completato con informazioni su come effettuare la formazione su TPU. 25 | -------------------------------------------------------------------------------- /utils/test_module/custom_pipeline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from transformers import Pipeline 4 | 5 | 6 | def softmax(outputs): 7 | maxes = np.max(outputs, axis=-1, keepdims=True) 8 | shifted_exp = np.exp(outputs - maxes) 9 | return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True) 10 | 11 | 12 | class PairClassificationPipeline(Pipeline): 13 | def _sanitize_parameters(self, **kwargs): 14 | preprocess_kwargs = {} 15 | if "second_text" in kwargs: 16 | preprocess_kwargs["second_text"] = kwargs["second_text"] 17 | return preprocess_kwargs, {}, {} 18 | 19 | def preprocess(self, text, second_text=None): 20 | return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework) 21 | 22 | def _forward(self, model_inputs): 23 | return self.model(**model_inputs) 24 | 25 | def postprocess(self, model_outputs): 26 | logits = model_outputs.logits[0].numpy() 27 | probabilities = softmax(logits) 28 | 29 | best_class = np.argmax(probabilities) 30 | label = self.model.config.id2label[best_class] 31 | score = probabilities[best_class].item() 32 | logits = logits.tolist() 33 | return {"label": label, "score": score, "logits": logits} 34 | -------------------------------------------------------------------------------- /docs/source/ko/perf_infer_gpu_many.md: -------------------------------------------------------------------------------- 1 | 15 | 16 | # 다중 GPU에서 효율적인 추론 [[efficient-inference-on-a-multiple-gpus]] 17 | 18 | 이 문서에는 다중 GPU에서 효율적으로 추론하는 방법에 대한 정보가 포함되어 있습니다. 19 | 20 | 21 | 참고: 다중 GPU 설정은 [단일 GPU 섹션](./perf_infer_gpu_one)에서 설명된 대부분의 전략을 사용할 수 있습니다. 그러나 더 나은 활용을 위해 간단한 기법들을 알아야 합니다. 22 | 23 | 24 | 25 | ## 더 빠른 추론을 위한 `BetterTransformer` [[bettertransformer-for-faster-inference]] 26 | 27 | 우리는 최근 텍스트, 이미지 및 오디오 모델에 대한 다중 GPU에서 더 빠른 추론을 위해 `BetterTransformer`를 통합했습니다. 자세한 내용은 이 통합에 대한 [문서](https://huggingface.co/docs/optimum/bettertransformer/overview)를 확인하십시오. -------------------------------------------------------------------------------- /.github/workflows/release-conda.yml: -------------------------------------------------------------------------------- 1 | name: Release - Conda 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | branches: 8 | - conda_* 9 | 10 | env: 11 | ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} 12 | 13 | jobs: 14 | build_and_package: 15 | runs-on: ubuntu-latest 16 | defaults: 17 | run: 18 | shell: bash -l {0} 19 | 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v1 23 | 24 | - name: Install miniconda 25 | uses: conda-incubator/setup-miniconda@v2 26 | with: 27 | auto-update-conda: true 28 | auto-activate-base: false 29 | python-version: 3.8 30 | activate-environment: "build-transformers" 31 | channels: huggingface 32 | 33 | - name: Setup conda env 34 | run: | 35 | conda install -c defaults anaconda-client conda-build 36 | 37 | - name: Extract version 38 | run: echo "TRANSFORMERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV 39 | 40 | - name: Build conda packages 41 | run: | 42 | conda info 43 | conda list 44 | conda-build .github/conda 45 | 46 | - name: Upload to Anaconda 47 | run: anaconda upload `conda-build .github/conda --output` --force 48 | -------------------------------------------------------------------------------- /docs/source/it/perf_train_special.md: -------------------------------------------------------------------------------- 1 | 15 | 16 | # Addestramento su Hardware Specializzato 17 | 18 | 19 | 20 | Nota: Molte delle strategie introdotte nella [sezione sulla GPU singola](perf_train_gpu_one) (come mixed precision training o gradient accumulation) e [sezione multi-GPU](perf_train_gpu_many) sono generiche e applicabili all'addestramento di modelli in generale quindi assicurati di dargli un'occhiata prima di immergerti in questa sezione. 21 | 22 | 23 | 24 | Questo documento sarà presto completato con informazioni su come effettuare la formazione su hardware specializzato. 25 | -------------------------------------------------------------------------------- /examples/legacy/pytorch-lightning/run_pos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if ! [ -f ./dev.txt ]; then 3 | echo "Download dev dataset...." 4 | curl -L -o ./dev.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-dev.conllu' 5 | fi 6 | 7 | if ! [ -f ./test.txt ]; then 8 | echo "Download test dataset...." 9 | curl -L -o ./test.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-test.conllu' 10 | fi 11 | 12 | if ! [ -f ./train.txt ]; then 13 | echo "Download train dataset...." 14 | curl -L -o ./train.txt 'https://github.com/UniversalDependencies/UD_English-EWT/raw/master/en_ewt-ud-train.conllu' 15 | fi 16 | 17 | export MAX_LENGTH=200 18 | export BERT_MODEL=bert-base-uncased 19 | export OUTPUT_DIR=postagger-model 20 | export BATCH_SIZE=32 21 | export NUM_EPOCHS=3 22 | export SAVE_STEPS=750 23 | export SEED=1 24 | 25 | 26 | # Add parent directory to python path to access lightning_base.py 27 | export PYTHONPATH="../":"${PYTHONPATH}" 28 | 29 | python3 run_ner.py --data_dir ./ \ 30 | --task_type POS \ 31 | --model_name_or_path $BERT_MODEL \ 32 | --output_dir $OUTPUT_DIR \ 33 | --max_seq_length $MAX_LENGTH \ 34 | --num_train_epochs $NUM_EPOCHS \ 35 | --train_batch_size $BATCH_SIZE \ 36 | --seed $SEED \ 37 | --gpus 1 \ 38 | --do_train \ 39 | --do_predict 40 | -------------------------------------------------------------------------------- /src/transformers/kernels/deformable_detr/cuda/ms_deform_attn_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | at::Tensor ms_deform_attn_cuda_forward( 15 | const at::Tensor &value, 16 | const at::Tensor &spatial_shapes, 17 | const at::Tensor &level_start_index, 18 | const at::Tensor &sampling_loc, 19 | const at::Tensor &attn_weight, 20 | const int im2col_step); 21 | 22 | std::vector ms_deform_attn_cuda_backward( 23 | const at::Tensor &value, 24 | const at::Tensor &spatial_shapes, 25 | const at::Tensor &level_start_index, 26 | const at::Tensor &sampling_loc, 27 | const at::Tensor &attn_weight, 28 | const at::Tensor &grad_output, 29 | const int im2col_step); 30 | -------------------------------------------------------------------------------- /src/transformers/kernels/deformable_detr/cpu/ms_deform_attn_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | at::Tensor 15 | ms_deform_attn_cpu_forward( 16 | const at::Tensor &value, 17 | const at::Tensor &spatial_shapes, 18 | const at::Tensor &level_start_index, 19 | const at::Tensor &sampling_loc, 20 | const at::Tensor &attn_weight, 21 | const int im2col_step); 22 | 23 | std::vector 24 | ms_deform_attn_cpu_backward( 25 | const at::Tensor &value, 26 | const at::Tensor &spatial_shapes, 27 | const at::Tensor &level_start_index, 28 | const at::Tensor &sampling_loc, 29 | const at::Tensor &attn_weight, 30 | const at::Tensor &grad_output, 31 | const int im2col_step); 32 | 33 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/minify_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2020 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from pathlib import Path 17 | 18 | import fire 19 | 20 | 21 | def minify(src_dir: str, dest_dir: str, n: int): 22 | """Write first n lines of each file f in src_dir to dest_dir/f""" 23 | src_dir = Path(src_dir) 24 | dest_dir = Path(dest_dir) 25 | dest_dir.mkdir(exist_ok=True) 26 | for path in src_dir.iterdir(): 27 | new = [x.rstrip() for x in list(path.open().readlines())][:n] 28 | dest_path = dest_dir.joinpath(path.name) 29 | print(dest_path) 30 | dest_path.open("w").write("\n".join(new)) 31 | 32 | 33 | if __name__ == "__main__": 34 | fire.Fire(minify) 35 | -------------------------------------------------------------------------------- /examples/legacy/seq2seq/rouge_cli.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import fire 16 | 17 | from utils import calculate_rouge, save_json 18 | 19 | 20 | def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs): 21 | """Kwargs will be passed to calculate_rouge""" 22 | pred_lns = [x.strip() for x in open(pred_path).readlines()] 23 | tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)] 24 | metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs) 25 | if save_path is not None: 26 | save_json(metrics, save_path, indent=None) 27 | return metrics # these print nicely 28 | 29 | 30 | if __name__ == "__main__": 31 | fire.Fire(calculate_rouge_path) 32 | --------------------------------------------------------------------------------