├── LICENSE
├── README.md
├── compute_interpretation_scores.py
├── data
    ├── aggrefact-deduplicated-final-test
    │   └── data-dev.jsonl
    ├── aggrefact-deduplicated-final
    │   ├── data-dev.jsonl
    │   └── data-train.jsonl
    └── fever2
    │   └── data-dev.jsonl
├── evaluate_multi_label_classification_scores.py
├── modeling
    ├── __init__.py
    ├── model.py
    ├── run_new.py
    ├── scripts
    │   ├── aggrefact-finetune-finegrainfact-model.sh
    │   ├── aggrefact-inference-finegrainfact-model.sh
    │   └── fever2-inference-finegrainfact-model.sh
    ├── transformers
    │   ├── __init__.py
    │   ├── activations.py
    │   ├── activations_tf.py
    │   ├── benchmark
    │   │   ├── __init__.py
    │   │   ├── benchmark.py
    │   │   ├── benchmark_args.py
    │   │   ├── benchmark_args_tf.py
    │   │   ├── benchmark_args_utils.py
    │   │   ├── benchmark_tf.py
    │   │   └── benchmark_utils.py
    │   ├── commands
    │   │   ├── __init__.py
    │   │   ├── add_new_model.py
    │   │   ├── add_new_model_like.py
    │   │   ├── convert.py
    │   │   ├── download.py
    │   │   ├── env.py
    │   │   ├── lfs.py
    │   │   ├── run.py
    │   │   ├── serving.py
    │   │   ├── train.py
    │   │   ├── transformers_cli.py
    │   │   └── user.py
    │   ├── configuration_utils.py
    │   ├── convert_graph_to_onnx.py
    │   ├── convert_pytorch_checkpoint_to_tf2.py
    │   ├── convert_slow_tokenizer.py
    │   ├── convert_slow_tokenizers_checkpoints_to_fast.py
    │   ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── data_collator.py
    │   │   ├── datasets
    │   │   │   ├── __init__.py
    │   │   │   ├── glue.py
    │   │   │   ├── language_modeling.py
    │   │   │   └── squad.py
    │   │   ├── metrics
    │   │   │   ├── __init__.py
    │   │   │   └── squad_metrics.py
    │   │   ├── processors
    │   │   │   ├── __init__.py
    │   │   │   ├── glue.py
    │   │   │   ├── squad.py
    │   │   │   ├── utils.py
    │   │   │   └── xnli.py
    │   │   └── test_generation_utils.py
    │   ├── debug_utils.py
    │   ├── deepspeed.py
    │   ├── dependency_versions_check.py
    │   ├── dependency_versions_table.py
    │   ├── dynamic_module_utils.py
    │   ├── feature_extraction_sequence_utils.py
    │   ├── feature_extraction_utils.py
    │   ├── file_utils.py
    │   ├── generation_beam_constraints.py
    │   ├── generation_beam_search.py
    │   ├── generation_flax_logits_process.py
    │   ├── generation_flax_utils.py
    │   ├── generation_logits_process.py
    │   ├── generation_stopping_criteria.py
    │   ├── generation_tf_logits_process.py
    │   ├── generation_tf_utils.py
    │   ├── generation_utils.py
    │   ├── hf_argparser.py
    │   ├── image_utils.py
    │   ├── integrations.py
    │   ├── keras_callbacks.py
    │   ├── modelcard.py
    │   ├── modeling_flax_outputs.py
    │   ├── modeling_flax_pytorch_utils.py
    │   ├── modeling_flax_utils.py
    │   ├── modeling_outputs.py
    │   ├── modeling_tf_outputs.py
    │   ├── modeling_tf_pytorch_utils.py
    │   ├── modeling_tf_utils.py
    │   ├── modeling_utils.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── albert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_albert.py
    │   │   │   ├── convert_albert_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_albert.py
    │   │   │   ├── modeling_flax_albert.py
    │   │   │   ├── modeling_tf_albert.py
    │   │   │   ├── tokenization_albert.py
    │   │   │   └── tokenization_albert_fast.py
    │   │   ├── auto
    │   │   │   ├── __init__.py
    │   │   │   ├── auto_factory.py
    │   │   │   ├── configuration_auto.py
    │   │   │   ├── dynamic.py
    │   │   │   ├── feature_extraction_auto.py
    │   │   │   ├── modeling_auto.py
    │   │   │   ├── modeling_flax_auto.py
    │   │   │   ├── modeling_tf_auto.py
    │   │   │   ├── processing_auto.py
    │   │   │   └── tokenization_auto.py
    │   │   ├── bart
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_bart.py
    │   │   │   ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_bart.py
    │   │   │   ├── modeling_flax_bart.py
    │   │   │   ├── modeling_tf_bart.py
    │   │   │   ├── tokenization_bart.py
    │   │   │   └── tokenization_bart_fast.py
    │   │   ├── barthez
    │   │   │   ├── __init__.py
    │   │   │   ├── tokenization_barthez.py
    │   │   │   └── tokenization_barthez_fast.py
    │   │   ├── bartpho
    │   │   │   ├── __init__.py
    │   │   │   └── tokenization_bartpho.py
    │   │   ├── beit
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_beit.py
    │   │   │   ├── convert_beit_unilm_to_pytorch.py
    │   │   │   ├── feature_extraction_beit.py
    │   │   │   ├── modeling_beit.py
    │   │   │   └── modeling_flax_beit.py
    │   │   ├── bert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_bert.py
    │   │   │   ├── convert_bert_original_tf2_checkpoint_to_pytorch.py
    │   │   │   ├── convert_bert_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── convert_bert_pytorch_checkpoint_to_original_tf.py
    │   │   │   ├── modeling_bert.py
    │   │   │   ├── modeling_flax_bert.py
    │   │   │   ├── modeling_tf_bert.py
    │   │   │   ├── tokenization_bert.py
    │   │   │   └── tokenization_bert_fast.py
    │   │   ├── bert_adapter
    │   │   │   ├── __init__.py
    │   │   │   └── modeling_bert.py
    │   │   ├── bert_generation
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_bert_generation.py
    │   │   │   ├── modeling_bert_generation.py
    │   │   │   └── tokenization_bert_generation.py
    │   │   ├── bert_japanese
    │   │   │   ├── __init__.py
    │   │   │   └── tokenization_bert_japanese.py
    │   │   ├── bertweet
    │   │   │   ├── __init__.py
    │   │   │   └── tokenization_bertweet.py
    │   │   ├── big_bird
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_big_bird.py
    │   │   │   ├── convert_bigbird_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_big_bird.py
    │   │   │   ├── modeling_flax_big_bird.py
    │   │   │   ├── tokenization_big_bird.py
    │   │   │   └── tokenization_big_bird_fast.py
    │   │   ├── bigbird_pegasus
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_bigbird_pegasus.py
    │   │   │   ├── convert_bigbird_pegasus_tf_to_pytorch.py
    │   │   │   └── modeling_bigbird_pegasus.py
    │   │   ├── blenderbot
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_blenderbot.py
    │   │   │   ├── convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_blenderbot.py
    │   │   │   ├── modeling_flax_blenderbot.py
    │   │   │   ├── modeling_tf_blenderbot.py
    │   │   │   ├── tokenization_blenderbot.py
    │   │   │   └── tokenization_blenderbot_fast.py
    │   │   ├── blenderbot_small
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_blenderbot_small.py
    │   │   │   ├── modeling_blenderbot_small.py
    │   │   │   ├── modeling_flax_blenderbot_small.py
    │   │   │   ├── modeling_tf_blenderbot_small.py
    │   │   │   ├── tokenization_blenderbot_small.py
    │   │   │   └── tokenization_blenderbot_small_fast.py
    │   │   ├── bort
    │   │   │   ├── __init__.py
    │   │   │   └── convert_bort_original_gluonnlp_checkpoint_to_pytorch.py
    │   │   ├── byt5
    │   │   │   ├── __init__.py
    │   │   │   ├── convert_byt5_original_tf_checkpoint_to_pytorch.py
    │   │   │   └── tokenization_byt5.py
    │   │   ├── camembert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_camembert.py
    │   │   │   ├── modeling_camembert.py
    │   │   │   ├── modeling_tf_camembert.py
    │   │   │   ├── tokenization_camembert.py
    │   │   │   └── tokenization_camembert_fast.py
    │   │   ├── canine
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_canine.py
    │   │   │   ├── convert_canine_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_canine.py
    │   │   │   └── tokenization_canine.py
    │   │   ├── clip
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_clip.py
    │   │   │   ├── convert_clip_original_pytorch_to_hf.py
    │   │   │   ├── feature_extraction_clip.py
    │   │   │   ├── modeling_clip.py
    │   │   │   ├── modeling_flax_clip.py
    │   │   │   ├── modeling_tf_clip.py
    │   │   │   ├── processing_clip.py
    │   │   │   ├── tokenization_clip.py
    │   │   │   └── tokenization_clip_fast.py
    │   │   ├── convbert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_convbert.py
    │   │   │   ├── convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py
    │   │   │   ├── modeling_convbert.py
    │   │   │   ├── modeling_tf_convbert.py
    │   │   │   ├── tokenization_convbert.py
    │   │   │   └── tokenization_convbert_fast.py
    │   │   ├── convnext
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_convnext.py
    │   │   │   ├── convert_convnext_to_pytorch.py
    │   │   │   ├── feature_extraction_convnext.py
    │   │   │   ├── modeling_convnext.py
    │   │   │   └── modeling_tf_convnext.py
    │   │   ├── cpm
    │   │   │   ├── __init__.py
    │   │   │   ├── tokenization_cpm.py
    │   │   │   └── tokenization_cpm_fast.py
    │   │   ├── ctrl
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_ctrl.py
    │   │   │   ├── modeling_ctrl.py
    │   │   │   ├── modeling_tf_ctrl.py
    │   │   │   └── tokenization_ctrl.py
    │   │   ├── data2vec
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_data2vec_audio.py
    │   │   │   ├── configuration_data2vec_text.py
    │   │   │   ├── convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_data2vec_audio.py
    │   │   │   └── modeling_data2vec_text.py
    │   │   ├── deberta
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_deberta.py
    │   │   │   ├── modeling_deberta.py
    │   │   │   ├── modeling_tf_deberta.py
    │   │   │   ├── tokenization_deberta.py
    │   │   │   └── tokenization_deberta_fast.py
    │   │   ├── deberta_v2
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_deberta_v2.py
    │   │   │   ├── modeling_deberta_v2.py
    │   │   │   ├── modeling_tf_deberta_v2.py
    │   │   │   └── tokenization_deberta_v2.py
    │   │   ├── decision_transformer
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_decision_transformer.py
    │   │   │   └── modeling_decision_transformer.py
    │   │   ├── deit
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_deit.py
    │   │   │   ├── convert_deit_timm_to_pytorch.py
    │   │   │   ├── feature_extraction_deit.py
    │   │   │   └── modeling_deit.py
    │   │   ├── detr
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_detr.py
    │   │   │   ├── convert_detr_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── feature_extraction_detr.py
    │   │   │   └── modeling_detr.py
    │   │   ├── dialogpt
    │   │   │   ├── __init__.py
    │   │   │   └── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
    │   │   ├── distilbert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_distilbert.py
    │   │   │   ├── modeling_distilbert.py
    │   │   │   ├── modeling_flax_distilbert.py
    │   │   │   ├── modeling_tf_distilbert.py
    │   │   │   ├── tokenization_distilbert.py
    │   │   │   └── tokenization_distilbert_fast.py
    │   │   ├── dit
    │   │   │   ├── __init__.py
    │   │   │   └── convert_dit_unilm_to_pytorch.py
    │   │   ├── dpr
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_dpr.py
    │   │   │   ├── convert_dpr_original_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_dpr.py
    │   │   │   ├── modeling_tf_dpr.py
    │   │   │   ├── tokenization_dpr.py
    │   │   │   └── tokenization_dpr_fast.py
    │   │   ├── dpt
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_dpt.py
    │   │   │   ├── convert_dpt_to_pytorch.py
    │   │   │   ├── feature_extraction_dpt.py
    │   │   │   └── modeling_dpt.py
    │   │   ├── electra
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_electra.py
    │   │   │   ├── convert_electra_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_electra.py
    │   │   │   ├── modeling_flax_electra.py
    │   │   │   ├── modeling_tf_electra.py
    │   │   │   ├── tokenization_electra.py
    │   │   │   └── tokenization_electra_fast.py
    │   │   ├── encoder_decoder
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_encoder_decoder.py
    │   │   │   ├── modeling_encoder_decoder.py
    │   │   │   ├── modeling_flax_encoder_decoder.py
    │   │   │   └── modeling_tf_encoder_decoder.py
    │   │   ├── flaubert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_flaubert.py
    │   │   │   ├── modeling_flaubert.py
    │   │   │   ├── modeling_tf_flaubert.py
    │   │   │   └── tokenization_flaubert.py
    │   │   ├── fnet
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_fnet.py
    │   │   │   ├── convert_fnet_original_flax_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_fnet.py
    │   │   │   ├── tokenization_fnet.py
    │   │   │   └── tokenization_fnet_fast.py
    │   │   ├── fsmt
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_fsmt.py
    │   │   │   ├── convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_fsmt.py
    │   │   │   └── tokenization_fsmt.py
    │   │   ├── funnel
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_funnel.py
    │   │   │   ├── convert_funnel_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_funnel.py
    │   │   │   ├── modeling_tf_funnel.py
    │   │   │   ├── tokenization_funnel.py
    │   │   │   └── tokenization_funnel_fast.py
    │   │   ├── glpn
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_glpn.py
    │   │   │   ├── convert_glpn_to_pytorch.py
    │   │   │   ├── feature_extraction_glpn.py
    │   │   │   └── modeling_glpn.py
    │   │   ├── gpt2
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_gpt2.py
    │   │   │   ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_flax_gpt2.py
    │   │   │   ├── modeling_gpt2.py
    │   │   │   ├── modeling_tf_gpt2.py
    │   │   │   ├── tokenization_gpt2.py
    │   │   │   └── tokenization_gpt2_fast.py
    │   │   ├── gpt_neo
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_gpt_neo.py
    │   │   │   ├── convert_gpt_neo_mesh_tf_to_pytorch.py
    │   │   │   ├── modeling_flax_gpt_neo.py
    │   │   │   └── modeling_gpt_neo.py
    │   │   ├── gptj
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_gptj.py
    │   │   │   ├── modeling_flax_gptj.py
    │   │   │   ├── modeling_gptj.py
    │   │   │   └── modeling_tf_gptj.py
    │   │   ├── herbert
    │   │   │   ├── __init__.py
    │   │   │   ├── tokenization_herbert.py
    │   │   │   └── tokenization_herbert_fast.py
    │   │   ├── hubert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_hubert.py
    │   │   │   ├── convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py
    │   │   │   ├── convert_hubert_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── convert_hubert_original_s3prl_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_hubert.py
    │   │   │   └── modeling_tf_hubert.py
    │   │   ├── ibert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_ibert.py
    │   │   │   ├── modeling_ibert.py
    │   │   │   └── quant_modules.py
    │   │   ├── imagegpt
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_imagegpt.py
    │   │   │   ├── convert_imagegpt_original_tf2_to_pytorch.py
    │   │   │   ├── feature_extraction_imagegpt.py
    │   │   │   └── modeling_imagegpt.py
    │   │   ├── layoutlm
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_layoutlm.py
    │   │   │   ├── modeling_layoutlm.py
    │   │   │   ├── modeling_tf_layoutlm.py
    │   │   │   ├── tokenization_layoutlm.py
    │   │   │   └── tokenization_layoutlm_fast.py
    │   │   ├── layoutlmv2
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_layoutlmv2.py
    │   │   │   ├── feature_extraction_layoutlmv2.py
    │   │   │   ├── modeling_layoutlmv2.py
    │   │   │   ├── processing_layoutlmv2.py
    │   │   │   ├── tokenization_layoutlmv2.py
    │   │   │   └── tokenization_layoutlmv2_fast.py
    │   │   ├── layoutxlm
    │   │   │   ├── __init__.py
    │   │   │   ├── processing_layoutxlm.py
    │   │   │   ├── tokenization_layoutxlm.py
    │   │   │   └── tokenization_layoutxlm_fast.py
    │   │   ├── led
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_led.py
    │   │   │   ├── modeling_led.py
    │   │   │   ├── modeling_tf_led.py
    │   │   │   ├── tokenization_led.py
    │   │   │   └── tokenization_led_fast.py
    │   │   ├── longformer
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_longformer.py
    │   │   │   ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
    │   │   │   ├── modeling_longformer.py
    │   │   │   ├── modeling_tf_longformer.py
    │   │   │   ├── tokenization_longformer.py
    │   │   │   └── tokenization_longformer_fast.py
    │   │   ├── luke
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_luke.py
    │   │   │   ├── convert_luke_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_luke.py
    │   │   │   └── tokenization_luke.py
    │   │   ├── lxmert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_lxmert.py
    │   │   │   ├── convert_lxmert_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_lxmert.py
    │   │   │   ├── modeling_tf_lxmert.py
    │   │   │   ├── tokenization_lxmert.py
    │   │   │   └── tokenization_lxmert_fast.py
    │   │   ├── m2m_100
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_m2m_100.py
    │   │   │   ├── convert_m2m100_original_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_m2m_100.py
    │   │   │   └── tokenization_m2m_100.py
    │   │   ├── marian
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_marian.py
    │   │   │   ├── convert_marian_tatoeba_to_pytorch.py
    │   │   │   ├── convert_marian_to_pytorch.py
    │   │   │   ├── modeling_flax_marian.py
    │   │   │   ├── modeling_marian.py
    │   │   │   ├── modeling_tf_marian.py
    │   │   │   └── tokenization_marian.py
    │   │   ├── maskformer
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_maskformer.py
    │   │   │   ├── convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── feature_extraction_maskformer.py
    │   │   │   └── modeling_maskformer.py
    │   │   ├── mbart
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_mbart.py
    │   │   │   ├── convert_mbart_original_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_flax_mbart.py
    │   │   │   ├── modeling_mbart.py
    │   │   │   ├── modeling_tf_mbart.py
    │   │   │   ├── tokenization_mbart.py
    │   │   │   └── tokenization_mbart_fast.py
    │   │   ├── mbart50
    │   │   │   ├── __init__.py
    │   │   │   ├── tokenization_mbart50.py
    │   │   │   └── tokenization_mbart50_fast.py
    │   │   ├── megatron_bert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_megatron_bert.py
    │   │   │   ├── convert_megatron_bert_checkpoint.py
    │   │   │   └── modeling_megatron_bert.py
    │   │   ├── megatron_gpt2
    │   │   │   ├── __init__.py
    │   │   │   └── convert_megatron_gpt2_checkpoint.py
    │   │   ├── mluke
    │   │   │   ├── __init__.py
    │   │   │   ├── convert_mluke_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   └── tokenization_mluke.py
    │   │   ├── mmbt
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_mmbt.py
    │   │   │   └── modeling_mmbt.py
    │   │   ├── mobilebert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_mobilebert.py
    │   │   │   ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_mobilebert.py
    │   │   │   ├── modeling_tf_mobilebert.py
    │   │   │   ├── tokenization_mobilebert.py
    │   │   │   └── tokenization_mobilebert_fast.py
    │   │   ├── mpnet
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_mpnet.py
    │   │   │   ├── modeling_mpnet.py
    │   │   │   ├── modeling_tf_mpnet.py
    │   │   │   ├── tokenization_mpnet.py
    │   │   │   └── tokenization_mpnet_fast.py
    │   │   ├── mt5
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_mt5.py
    │   │   │   ├── modeling_flax_mt5.py
    │   │   │   ├── modeling_mt5.py
    │   │   │   └── modeling_tf_mt5.py
    │   │   ├── nystromformer
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_nystromformer.py
    │   │   │   ├── convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   └── modeling_nystromformer.py
    │   │   ├── openai
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_openai.py
    │   │   │   ├── convert_openai_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_openai.py
    │   │   │   ├── modeling_tf_openai.py
    │   │   │   ├── tokenization_openai.py
    │   │   │   └── tokenization_openai_fast.py
    │   │   ├── pegasus
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_pegasus.py
    │   │   │   ├── convert_pegasus_tf_to_pytorch.py
    │   │   │   ├── modeling_flax_pegasus.py
    │   │   │   ├── modeling_pegasus.py
    │   │   │   ├── modeling_tf_pegasus.py
    │   │   │   ├── tokenization_pegasus.py
    │   │   │   └── tokenization_pegasus_fast.py
    │   │   ├── perceiver
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_perceiver.py
    │   │   │   ├── convert_perceiver_haiku_to_pytorch.py
    │   │   │   ├── feature_extraction_perceiver.py
    │   │   │   ├── modeling_perceiver.py
    │   │   │   └── tokenization_perceiver.py
    │   │   ├── phobert
    │   │   │   ├── __init__.py
    │   │   │   └── tokenization_phobert.py
    │   │   ├── plbart
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_plbart.py
    │   │   │   ├── convert_plbart_original_checkpoint_to_torch.py
    │   │   │   ├── modeling_plbart.py
    │   │   │   └── tokenization_plbart.py
    │   │   ├── poolformer
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_poolformer.py
    │   │   │   ├── convert_poolformer_original_to_pytorch.py
    │   │   │   ├── feature_extraction_poolformer.py
    │   │   │   └── modeling_poolformer.py
    │   │   ├── prophetnet
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_prophetnet.py
    │   │   │   ├── convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_prophetnet.py
    │   │   │   └── tokenization_prophetnet.py
    │   │   ├── qdqbert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_qdqbert.py
    │   │   │   └── modeling_qdqbert.py
    │   │   ├── rag
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_rag.py
    │   │   │   ├── modeling_rag.py
    │   │   │   ├── modeling_tf_rag.py
    │   │   │   ├── retrieval_rag.py
    │   │   │   └── tokenization_rag.py
    │   │   ├── realm
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_realm.py
    │   │   │   ├── modeling_realm.py
    │   │   │   ├── retrieval_realm.py
    │   │   │   ├── tokenization_realm.py
    │   │   │   └── tokenization_realm_fast.py
    │   │   ├── reformer
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_reformer.py
    │   │   │   ├── convert_reformer_trax_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_reformer.py
    │   │   │   ├── tokenization_reformer.py
    │   │   │   └── tokenization_reformer_fast.py
    │   │   ├── rembert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_rembert.py
    │   │   │   ├── convert_rembert_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_rembert.py
    │   │   │   ├── modeling_tf_rembert.py
    │   │   │   ├── tokenization_rembert.py
    │   │   │   └── tokenization_rembert_fast.py
    │   │   ├── resnet
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_resnet.py
    │   │   │   ├── convert_resnet_to_pytorch.py
    │   │   │   └── modeling_resnet.py
    │   │   ├── retribert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_retribert.py
    │   │   │   ├── modeling_retribert.py
    │   │   │   ├── tokenization_retribert.py
    │   │   │   └── tokenization_retribert_fast.py
    │   │   ├── roberta
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_roberta.py
    │   │   │   ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_flax_roberta.py
    │   │   │   ├── modeling_roberta.py
    │   │   │   ├── modeling_tf_roberta.py
    │   │   │   ├── tokenization_roberta.py
    │   │   │   └── tokenization_roberta_fast.py
    │   │   ├── roformer
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_roformer.py
    │   │   │   ├── convert_roformer_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_flax_roformer.py
    │   │   │   ├── modeling_roformer.py
    │   │   │   ├── modeling_tf_roformer.py
    │   │   │   ├── tokenization_roformer.py
    │   │   │   ├── tokenization_roformer_fast.py
    │   │   │   └── tokenization_utils.py
    │   │   ├── segformer
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_segformer.py
    │   │   │   ├── convert_segformer_original_to_pytorch.py
    │   │   │   ├── feature_extraction_segformer.py
    │   │   │   └── modeling_segformer.py
    │   │   ├── sew
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_sew.py
    │   │   │   ├── convert_sew_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   └── modeling_sew.py
    │   │   ├── sew_d
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_sew_d.py
    │   │   │   ├── convert_sew_d_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   └── modeling_sew_d.py
    │   │   ├── speech_encoder_decoder
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_speech_encoder_decoder.py
    │   │   │   ├── convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py
    │   │   │   ├── convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py
    │   │   │   ├── modeling_flax_speech_encoder_decoder.py
    │   │   │   └── modeling_speech_encoder_decoder.py
    │   │   ├── speech_to_text
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_speech_to_text.py
    │   │   │   ├── convert_s2t_fairseq_to_tfms.py
    │   │   │   ├── feature_extraction_speech_to_text.py
    │   │   │   ├── modeling_speech_to_text.py
    │   │   │   ├── modeling_tf_speech_to_text.py
    │   │   │   ├── processing_speech_to_text.py
    │   │   │   └── tokenization_speech_to_text.py
    │   │   ├── speech_to_text_2
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_speech_to_text_2.py
    │   │   │   ├── modeling_speech_to_text_2.py
    │   │   │   ├── processing_speech_to_text_2.py
    │   │   │   └── tokenization_speech_to_text_2.py
    │   │   ├── splinter
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_splinter.py
    │   │   │   ├── modeling_splinter.py
    │   │   │   ├── tokenization_splinter.py
    │   │   │   └── tokenization_splinter_fast.py
    │   │   ├── squeezebert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_squeezebert.py
    │   │   │   ├── modeling_squeezebert.py
    │   │   │   ├── tokenization_squeezebert.py
    │   │   │   └── tokenization_squeezebert_fast.py
    │   │   ├── swin
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_swin.py
    │   │   │   ├── convert_swin_timm_to_pytorch.py
    │   │   │   └── modeling_swin.py
    │   │   ├── t5
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_t5.py
    │   │   │   ├── convert_t5_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_flax_t5.py
    │   │   │   ├── modeling_t5.py
    │   │   │   ├── modeling_tf_t5.py
    │   │   │   ├── tokenization_t5.py
    │   │   │   └── tokenization_t5_fast.py
    │   │   ├── tapas
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_tapas.py
    │   │   │   ├── convert_tapas_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_tapas.py
    │   │   │   ├── modeling_tf_tapas.py
    │   │   │   └── tokenization_tapas.py
    │   │   ├── transfo_xl
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_transfo_xl.py
    │   │   │   ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_tf_transfo_xl.py
    │   │   │   ├── modeling_tf_transfo_xl_utilities.py
    │   │   │   ├── modeling_transfo_xl.py
    │   │   │   ├── modeling_transfo_xl_utilities.py
    │   │   │   └── tokenization_transfo_xl.py
    │   │   ├── trocr
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_trocr.py
    │   │   │   ├── modeling_trocr.py
    │   │   │   └── processing_trocr.py
    │   │   ├── unispeech
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_unispeech.py
    │   │   │   ├── convert_unispeech_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   └── modeling_unispeech.py
    │   │   ├── unispeech_sat
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_unispeech_sat.py
    │   │   │   ├── convert_unispeech_original_s3prl_checkpoint_to_pytorch.py
    │   │   │   ├── convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   └── modeling_unispeech_sat.py
    │   │   ├── van
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_van.py
    │   │   │   ├── convert_van_to_pytorch.py
    │   │   │   └── modeling_van.py
    │   │   ├── vilt
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_vilt.py
    │   │   │   ├── convert_vilt_original_to_pytorch.py
    │   │   │   ├── feature_extraction_vilt.py
    │   │   │   ├── modeling_vilt.py
    │   │   │   └── processing_vilt.py
    │   │   ├── vision_encoder_decoder
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_vision_encoder_decoder.py
    │   │   │   ├── convert_trocr_unilm_to_pytorch.py
    │   │   │   ├── modeling_flax_vision_encoder_decoder.py
    │   │   │   ├── modeling_tf_vision_encoder_decoder.py
    │   │   │   └── modeling_vision_encoder_decoder.py
    │   │   ├── vision_text_dual_encoder
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_vision_text_dual_encoder.py
    │   │   │   ├── modeling_flax_vision_text_dual_encoder.py
    │   │   │   ├── modeling_vision_text_dual_encoder.py
    │   │   │   └── processing_vision_text_dual_encoder.py
    │   │   ├── visual_bert
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_visual_bert.py
    │   │   │   ├── convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   └── modeling_visual_bert.py
    │   │   ├── vit
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_vit.py
    │   │   │   ├── convert_dino_to_pytorch.py
    │   │   │   ├── convert_vit_timm_to_pytorch.py
    │   │   │   ├── feature_extraction_vit.py
    │   │   │   ├── modeling_flax_vit.py
    │   │   │   ├── modeling_tf_vit.py
    │   │   │   └── modeling_vit.py
    │   │   ├── vit_mae
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_vit_mae.py
    │   │   │   ├── convert_vit_mae_to_pytorch.py
    │   │   │   ├── modeling_tf_vit_mae.py
    │   │   │   └── modeling_vit_mae.py
    │   │   ├── wav2vec2
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_wav2vec2.py
    │   │   │   ├── convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py
    │   │   │   ├── feature_extraction_wav2vec2.py
    │   │   │   ├── modeling_flax_wav2vec2.py
    │   │   │   ├── modeling_tf_wav2vec2.py
    │   │   │   ├── modeling_wav2vec2.py
    │   │   │   ├── processing_wav2vec2.py
    │   │   │   └── tokenization_wav2vec2.py
    │   │   ├── wav2vec2_phoneme
    │   │   │   ├── __init__.py
    │   │   │   └── tokenization_wav2vec2_phoneme.py
    │   │   ├── wav2vec2_with_lm
    │   │   │   ├── __init__.py
    │   │   │   └── processing_wav2vec2_with_lm.py
    │   │   ├── wavlm
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_wavlm.py
    │   │   │   ├── convert_wavlm_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── convert_wavlm_original_s3prl_checkpoint_to_pytorch.py
    │   │   │   └── modeling_wavlm.py
    │   │   ├── xglm
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_xglm.py
    │   │   │   ├── convert_xglm_original_ckpt_to_trfms.py
    │   │   │   ├── modeling_flax_xglm.py
    │   │   │   ├── modeling_xglm.py
    │   │   │   ├── tokenization_xglm.py
    │   │   │   └── tokenization_xglm_fast.py
    │   │   ├── xlm
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_xlm.py
    │   │   │   ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_tf_xlm.py
    │   │   │   ├── modeling_xlm.py
    │   │   │   └── tokenization_xlm.py
    │   │   ├── xlm_prophetnet
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_xlm_prophetnet.py
    │   │   │   ├── modeling_xlm_prophetnet.py
    │   │   │   └── tokenization_xlm_prophetnet.py
    │   │   ├── xlm_roberta
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_xlm_roberta.py
    │   │   │   ├── modeling_flax_xlm_roberta.py
    │   │   │   ├── modeling_tf_xlm_roberta.py
    │   │   │   ├── modeling_xlm_roberta.py
    │   │   │   ├── tokenization_xlm_roberta.py
    │   │   │   └── tokenization_xlm_roberta_fast.py
    │   │   ├── xlm_roberta_xl
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_xlm_roberta_xl.py
    │   │   │   ├── convert_xlm_roberta_xl_original_pytorch_checkpoint_to_pytorch.py
    │   │   │   └── modeling_xlm_roberta_xl.py
    │   │   ├── xlnet
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_xlnet.py
    │   │   │   ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
    │   │   │   ├── modeling_tf_xlnet.py
    │   │   │   ├── modeling_xlnet.py
    │   │   │   ├── tokenization_xlnet.py
    │   │   │   └── tokenization_xlnet_fast.py
    │   │   └── yoso
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_yoso.py
    │   │   │   ├── convert_yoso_pytorch_to_pytorch.py
    │   │   │   └── modeling_yoso.py
    │   ├── onnx
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── config.py
    │   │   ├── convert.py
    │   │   ├── features.py
    │   │   └── utils.py
    │   ├── optimization.py
    │   ├── optimization_tf.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   ├── audio_classification.py
    │   │   ├── audio_utils.py
    │   │   ├── automatic_speech_recognition.py
    │   │   ├── base.py
    │   │   ├── conversational.py
    │   │   ├── feature_extraction.py
    │   │   ├── fill_mask.py
    │   │   ├── image_classification.py
    │   │   ├── image_segmentation.py
    │   │   ├── object_detection.py
    │   │   ├── pt_utils.py
    │   │   ├── question_answering.py
    │   │   ├── table_question_answering.py
    │   │   ├── text2text_generation.py
    │   │   ├── text_classification.py
    │   │   ├── text_generation.py
    │   │   ├── token_classification.py
    │   │   ├── zero_shot_classification.py
    │   │   └── zero_shot_image_classification.py
    │   ├── processing_utils.py
    │   ├── py.typed
    │   ├── pytorch_utils.py
    │   ├── sagemaker
    │   │   ├── __init__.py
    │   │   ├── trainer_sm.py
    │   │   └── training_args_sm.py
    │   ├── testing_utils.py
    │   ├── tf_utils.py
    │   ├── tokenization_utils.py
    │   ├── tokenization_utils_base.py
    │   ├── tokenization_utils_fast.py
    │   ├── trainer.py
    │   ├── trainer_callback.py
    │   ├── trainer_pt_utils.py
    │   ├── trainer_seq2seq.py
    │   ├── trainer_tf.py
    │   ├── trainer_utils.py
    │   ├── training_args.py
    │   ├── training_args_seq2seq.py
    │   ├── training_args_tf.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── doc.py
    │   │   ├── dummy_detectron2_objects.py
    │   │   ├── dummy_flax_objects.py
    │   │   ├── dummy_pt_objects.py
    │   │   ├── dummy_pytorch_quantization_and_torch_objects.py
    │   │   ├── dummy_scatter_objects.py
    │   │   ├── dummy_sentencepiece_and_speech_objects.py
    │   │   ├── dummy_sentencepiece_and_tokenizers_objects.py
    │   │   ├── dummy_sentencepiece_objects.py
    │   │   ├── dummy_speech_objects.py
    │   │   ├── dummy_tf_objects.py
    │   │   ├── dummy_timm_and_vision_objects.py
    │   │   ├── dummy_timm_objects.py
    │   │   ├── dummy_tokenizers_objects.py
    │   │   ├── dummy_vision_objects.py
    │   │   ├── fx.py
    │   │   ├── fx_transformations.py
    │   │   ├── generic.py
    │   │   ├── hp_naming.py
    │   │   ├── hub.py
    │   │   ├── import_utils.py
    │   │   ├── logging.py
    │   │   ├── model_parallel_utils.py
    │   │   ├── notebook.py
    │   │   ├── sentencepiece_model_pb2.py
    │   │   └── versions.py
    └── utils.py
└── requirements.txt


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Ken Chan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Interpretable Automatic Fine-grained Inconsistency Detection in Text Summarization
 2 | 
 3 | This repository contains the source code for our ACL Findings 2023 paper: [Interpretable Automatic Fine-grained Inconsistency Detection in Text Summarization](https://arxiv.org/pdf/2305.14548).
 4 | 
 5 | If you use our source code, please cite our paper
 6 | ```
 7 | @inproceedings{finegrainfact,
 8 |     title={Interpretable Automatic Fine-grained Inconsistency Detection in Text Summarization},
 9 |     author={Chan, Hou Pong and Zeng, Qi and Ji, Heng},
10 |     booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
11 |     month = {July},
12 |     year = "2023",
13 |     publisher = "Association for Computational Linguistics",
14 |     }
15 | ```
16 | 
17 | ## Aggrefact-United Dataset
18 | 
19 | We conduct experiments on the Aggrefact-United dataset. If you use this dataset, please cite their [paper](https://arxiv.org/pdf/2205.12854v1.pdf).
20 | 
21 | The original dataset contains 5,496 samples. We remove the duplicated annotations and obtain 4,489 samples. 
22 | Then we randomly split data samples into train/validation/test sets of size 3,689/300/500. 
23 | After that, we use the SRL tool from Allennlp to parse the document and summary. 
24 | This repository contains our preprocessed data splits. 
25 | The training and validation sets are in `data/aggrefact-deduplicated-final`. The test set is in `data/aggrefact-deduplicated-final-test`.
26 | 
27 | ## Environment setup
28 | ```
29 | conda create -n finegrainfact python=3.7.13
30 | pip3 install -r requirements.txt
31 | ```
32 | 
33 | ## Training
34 | ```
35 | # please change the CODE_PATH, DATA_PATH, OUTPUT_PATH variables in the below script file before running it.
36 | bash modeling/scripts/aggrefact-train-finegrainfact-model.sh 2>&1 | tee ./logs/aggrefact-train-finegrainfact-model.log
37 | ```
38 | After the training process is completed, you can find the path to the best checkpoint by searching `Best bacc chkpt path:` in the log file `./logs/aggrefact-train-finegrainfact-model.log`.
39 | 
40 | ## Inference
41 | Run the following script. 
42 | ```
43 | # please change the CODE_PATH, DATA_PATH, CKPT_PATH variables in the below script file before running it.
44 | bash modeling/scripts/aggrefact-finetune-finegrainfact-model.sh
45 | ```
46 | 
47 | ## Evaluation of Document Fact Highlights
48 | Our preprocessed Fever 2.0 dataset is in `./data/fever2`.
49 | Run the following script. 
50 | ```
51 | # please change the CODE_PATH, DATA_PATH, CKPT_PATH variables in the below script file before running it.
52 | bash modeling/scripts/fever2-inference-finegrainfact-model.sh
53 | ```
54 | 


--------------------------------------------------------------------------------
/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenchan0226/FineGrainedFact/3d96823657fd23ff1091fc6e972c66a40222ec97/modeling/__init__.py


--------------------------------------------------------------------------------
/modeling/scripts/aggrefact-finetune-finegrainfact-model.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # Fine-tune FactCC model
 3 | 
 4 | # UPDATE PATHS BEFORE RUNNING SCRIPT
 5 | export CUDA_VISIBLE_DEVICES=0
 6 | export CODE_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/modeling # absolute path to modeling directory
 7 | export DATA_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/data/aggrefact-deduplicated-final # absolute path to data directory
 8 | export OUTPUT_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/model_ckpts/aggrefact-multi-label-claim-attn-mean-word-layer-attn-finetune-adapter-four-label-dedup # absolute path to store model checkpoint
 9 | 
10 | export TASK_NAME=aggrefact_multi_label_claim_attn_annotated_with_entire_sent_four_label
11 | export MODEL_NAME=bert-base-uncased
12 | export N_CLAIM_ATTN_HEADS=16
13 | 
14 | export SEED=$RANDOM
15 | python3 $CODE_PATH/run_new.py \
16 |   --task_name $TASK_NAME \
17 |   --do_train \
18 |   --do_eval \
19 |   --evaluate_during_training \
20 |   --do_lower_case \
21 |   --max_seq_length 512 \
22 |   --per_gpu_train_batch_size 12 \
23 |   --learning_rate 1e-5 \
24 |   --num_train_epochs 40 \
25 |   --data_dir $DATA_PATH \
26 |   --model_type bertmultilabelclaimattnmeanwordlayerattnadapter \
27 |   --model_name_or_path $MODEL_NAME \
28 |   --output_dir $OUTPUT_PATH/$MODEL_NAME-$TASK_NAME-finetune-$RANDOM-$N_CLAIM_ATTN_HEADS-$SEED/ \
29 |   --gradient_accumulation_steps 2 \
30 |   --seed $SEED \
31 |   --n_claim_attn_heads $N_CLAIM_ATTN_HEADS


--------------------------------------------------------------------------------
/modeling/scripts/aggrefact-inference-finegrainfact-model.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # Evaluate FactCC model
 3 | 
 4 | # UPDATE PATHS BEFORE RUNNING SCRIPT
 5 | export CUDA_VISIBLE_DEVICES=0
 6 | export CODE_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/modeling # absolute path to modeling directory
 7 | export DATA_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/data/aggrefact-deduplicated-final-test # absolute path to data directory
 8 | export TASK_NAME=aggrefact_multi_label_claim_attn_annotated_with_entire_sent_four_label
 9 | export N_CLAIM_ATTN_HEADS=16
10 | export CKPT_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/model_ckpts/aggrefact-multi-label-claim-attn-mil-cos-sim-finetune-adapter-four-label/bert-base-uncased-aggrefact_multi_label_claim_attn_annotated_with_entire_sent_four_label-finetune-7643/checkpoint-29
11 | 
12 | # run inference
13 | python3 $CODE_PATH/run_new.py \
14 |   --task_name $TASK_NAME \
15 |   --do_eval \
16 |   --do_lower_case \
17 |   --overwrite_cache \
18 |   --max_seq_length 512 \
19 |   --per_gpu_train_batch_size 12 \
20 |   --model_type bertmultilabelclaimattnmeanwordlayerattnadapter \
21 |   --model_name_or_path $CKPT_PATH \
22 |   --data_dir $DATA_PATH \
23 |   --output_dir $CKPT_PATH \
24 |   --n_claim_attn_heads $N_CLAIM_ATTN_HEADS \
25 |   --export_output
26 | # compute evaluation scores
27 | python3 evaluate_multi_label_classification_scores.py \
28 | --model_output_file $CKPT_PATH/model_outputs.jsonl \
29 | --src_jsonl_file $DATA_PATH/data-dev.jsonl \
30 | --is_entire


--------------------------------------------------------------------------------
/modeling/scripts/fever2-inference-finegrainfact-model.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # Evaluate FactCC model
 3 | 
 4 | # UPDATE PATHS BEFORE RUNNING SCRIPT
 5 | export CUDA_VISIBLE_DEVICES=0
 6 | export CODE_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/modeling # absolute path to modeling directory
 7 | export DATA_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/data/fever2 # absolute path to the directory that contains the data-dev.jsonl of fever2
 8 | export CKPT_PATH=/shared/nas/data/users/hpchan/projects/fineGrainedFact/model_ckpts/aggrefact-multi-label-claim-attn-mean-word-layer-attn-finetune-adapter-four-label-dedup/bert-base-uncased-aggrefact_multi_label_claim_attn_annotated_with_entire_sent_four_label-finetune-11803-16-2000/checkpoint-37
 9 | export TASK_NAME=aggrefact_multi_label_claim_attn_annotated_with_entire_sent_four_label
10 | export N_CLAIM_ATTN_HEADS=16
11 | 
12 | #mkdir $CKPT_PATH/fever2
13 | EVAL_OUTPUT_DIR=$CKPT_PATH/fever2
14 | python3 $CODE_PATH/run_new.py \
15 |   --task_name $TASK_NAME \
16 |   --do_eval \
17 |   --do_lower_case \
18 |   --overwrite_cache \
19 |   --max_seq_length 512 \
20 |   --per_gpu_train_batch_size 12 \
21 |   --model_type bertmultilabelclaimattnmeanwordlayerattnadapter \
22 |   --model_name_or_path $CKPT_PATH \
23 |   --data_dir $DATA_PATH \
24 |   --output_dir $CKPT_PATH \
25 |   --eval_output_dir $EVAL_OUTPUT_DIR \
26 |   --n_claim_attn_heads $N_CLAIM_ATTN_HEADS \
27 |   --export_output
28 | python3 compute_interpretation_scores.py \
29 | --model_output_file $EVAL_OUTPUT_DIR/model_outputs.jsonl \
30 | --src_jsonl_file $DATA_PATH/data-dev.jsonl
31 | 


--------------------------------------------------------------------------------
/modeling/transformers/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenchan0226/FineGrainedFact/3d96823657fd23ff1091fc6e972c66a40222ec97/modeling/transformers/benchmark/__init__.py


--------------------------------------------------------------------------------
/modeling/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from argparse import ArgumentParser
17 | 
18 | 
19 | class BaseTransformersCLICommand(ABC):
20 |     @staticmethod
21 |     @abstractmethod
22 |     def register_subcommand(parser: ArgumentParser):
23 |         raise NotImplementedError()
24 | 
25 |     @abstractmethod
26 |     def run(self):
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/modeling/transformers/commands/download.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from argparse import ArgumentParser
16 | 
17 | from . import BaseTransformersCLICommand
18 | 
19 | 
20 | def download_command_factory(args):
21 |     return DownloadCommand(args.model, args.cache_dir, args.force)
22 | 
23 | 
24 | class DownloadCommand(BaseTransformersCLICommand):
25 |     @staticmethod
26 |     def register_subcommand(parser: ArgumentParser):
27 |         download_parser = parser.add_parser("download")
28 |         download_parser.add_argument(
29 |             "--cache-dir", type=str, default=None, help="Path to location to store the models"
30 |         )
31 |         download_parser.add_argument(
32 |             "--force", action="store_true", help="Force the model to be download even if already in cache-dir"
33 |         )
34 |         download_parser.add_argument("model", type=str, help="Name of the model to download")
35 |         download_parser.set_defaults(func=download_command_factory)
36 | 
37 |     def __init__(self, model: str, cache: str, force: bool):
38 |         self._model = model
39 |         self._cache = cache
40 |         self._force = force
41 | 
42 |     def run(self):
43 |         from ..models.auto import AutoModel, AutoTokenizer
44 | 
45 |         AutoModel.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
46 |         AutoTokenizer.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
47 | 


--------------------------------------------------------------------------------
/modeling/transformers/commands/transformers_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from argparse import ArgumentParser
17 | 
18 | from .add_new_model import AddNewModelCommand
19 | from .add_new_model_like import AddNewModelLikeCommand
20 | from .convert import ConvertCommand
21 | from .download import DownloadCommand
22 | from .env import EnvironmentCommand
23 | from .lfs import LfsCommands
24 | from .run import RunCommand
25 | from .serving import ServeCommand
26 | from .user import UserCommands
27 | 
28 | 
29 | def main():
30 |     parser = ArgumentParser("Transformers CLI tool", usage="transformers-cli <command> [<args>]")
31 |     commands_parser = parser.add_subparsers(help="transformers-cli command helpers")
32 | 
33 |     # Register commands
34 |     ConvertCommand.register_subcommand(commands_parser)
35 |     DownloadCommand.register_subcommand(commands_parser)
36 |     EnvironmentCommand.register_subcommand(commands_parser)
37 |     RunCommand.register_subcommand(commands_parser)
38 |     ServeCommand.register_subcommand(commands_parser)
39 |     UserCommands.register_subcommand(commands_parser)
40 |     AddNewModelCommand.register_subcommand(commands_parser)
41 |     AddNewModelLikeCommand.register_subcommand(commands_parser)
42 |     LfsCommands.register_subcommand(commands_parser)
43 | 
44 |     # Let's go
45 |     args = parser.parse_args()
46 | 
47 |     if not hasattr(args, "func"):
48 |         parser.print_help()
49 |         exit(1)
50 | 
51 |     # Run
52 |     service = args.func(args)
53 |     service.run()
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/modeling/transformers/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .data_collator import (
20 |     DataCollatorForLanguageModeling,
21 |     DataCollatorForPermutationLanguageModeling,
22 |     DataCollatorForSeq2Seq,
23 |     DataCollatorForSOP,
24 |     DataCollatorForTokenClassification,
25 |     DataCollatorForWholeWordMask,
26 |     DataCollatorWithPadding,
27 |     DefaultDataCollator,
28 |     default_data_collator,
29 | )
30 | from .metrics import glue_compute_metrics, xnli_compute_metrics
31 | from .processors import (
32 |     DataProcessor,
33 |     InputExample,
34 |     InputFeatures,
35 |     SingleSentenceClassificationProcessor,
36 |     SquadExample,
37 |     SquadFeatures,
38 |     SquadV1Processor,
39 |     SquadV2Processor,
40 |     glue_convert_examples_to_features,
41 |     glue_output_modes,
42 |     glue_processors,
43 |     glue_tasks_num_labels,
44 |     squad_convert_examples_to_features,
45 |     xnli_output_modes,
46 |     xnli_processors,
47 |     xnli_tasks_num_labels,
48 | )
49 | 


--------------------------------------------------------------------------------
/modeling/transformers/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .glue import GlueDataset, GlueDataTrainingArguments
20 | from .language_modeling import (
21 |     LineByLineTextDataset,
22 |     LineByLineWithRefDataset,
23 |     LineByLineWithSOPTextDataset,
24 |     TextDataset,
25 |     TextDatasetForNextSentencePrediction,
26 | )
27 | from .squad import SquadDataset, SquadDataTrainingArguments
28 | 


--------------------------------------------------------------------------------
/modeling/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
20 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
21 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
22 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
23 | 


--------------------------------------------------------------------------------
/modeling/transformers/dependency_versions_check.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import sys
15 | 
16 | from .dependency_versions_table import deps
17 | from .utils.versions import require_version, require_version_core
18 | 
19 | 
20 | # define which module versions we always want to check at run time
21 | # (usually the ones defined in `install_requires` in setup.py)
22 | #
23 | # order specific notes:
24 | # - tqdm must be checked before tokenizers
25 | 
26 | pkgs_to_check_at_runtime = "python tqdm regex sacremoses requests packaging filelock numpy tokenizers".split()
27 | if sys.version_info < (3, 7):
28 |     pkgs_to_check_at_runtime.append("dataclasses")
29 | if sys.version_info < (3, 8):
30 |     pkgs_to_check_at_runtime.append("importlib_metadata")
31 | 
32 | for pkg in pkgs_to_check_at_runtime:
33 |     if pkg in deps:
34 |         if pkg == "tokenizers":
35 |             # must be loaded here, or else tqdm check may fail
36 |             from .utils import is_tokenizers_available
37 | 
38 |             if not is_tokenizers_available():
39 |                 continue  # not required, check version only if installed
40 | 
41 |         require_version_core(deps[pkg])
42 |     else:
43 |         raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
44 | 
45 | 
46 | def dep_version_check(pkg, hint=None):
47 |     require_version(deps[pkg], hint)
48 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert ALBERT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | import torch
21 | 
22 | from transformers import AlbertConfig, AlbertForPreTraining, load_tf_weights_in_albert
23 | from transformers.utils import logging
24 | 
25 | 
26 | logging.set_verbosity_info()
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = AlbertConfig.from_json_file(albert_config_file)
32 |     print(f"Building PyTorch model from configuration: {config}")
33 |     model = AlbertForPreTraining(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_albert(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print(f"Save PyTorch model to {pytorch_dump_path}")
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--albert_config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained ALBERT model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.albert_config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/barthez/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_sentencepiece_available, is_tokenizers_available
22 | 
23 | 
24 | _import_structure = {}
25 | 
26 | if is_sentencepiece_available():
27 |     _import_structure["tokenization_barthez"] = ["BarthezTokenizer"]
28 | 
29 | if is_tokenizers_available():
30 |     _import_structure["tokenization_barthez_fast"] = ["BarthezTokenizerFast"]
31 | 
32 | 
33 | if TYPE_CHECKING:
34 | 
35 |     if is_sentencepiece_available():
36 |         from .tokenization_barthez import BarthezTokenizer
37 | 
38 |     if is_tokenizers_available():
39 |         from .tokenization_barthez_fast import BarthezTokenizerFast
40 | 
41 | else:
42 |     import sys
43 | 
44 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
45 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/bartpho/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_sentencepiece_available
22 | 
23 | 
24 | _import_structure = {}
25 | 
26 | if is_sentencepiece_available():
27 |     _import_structure["tokenization_bartpho"] = ["BartphoTokenizer"]
28 | 
29 | if TYPE_CHECKING:
30 |     if is_sentencepiece_available():
31 |         from .tokenization_bartpho import BartphoTokenizer
32 | 
33 | else:
34 |     import sys
35 | 
36 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
37 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert BERT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | import torch
21 | 
22 | from transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert
23 | from transformers.utils import logging
24 | 
25 | 
26 | logging.set_verbosity_info()
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = BertConfig.from_json_file(bert_config_file)
32 |     print(f"Building PyTorch model from configuration: {config}")
33 |     model = BertForPreTraining(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_bert(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print(f"Save PyTorch model to {pytorch_dump_path}")
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--bert_config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained BERT model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.bert_config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/bert_adapter/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_flax_available, is_tf_available, is_tokenizers_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_bert": ["BERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "BertConfig", "BertOnnxConfig"],
26 |     "tokenization_bert": ["BasicTokenizer", "BertTokenizer", "WordpieceTokenizer"],
27 | }
28 | 
29 | if is_tokenizers_available():
30 |     _import_structure["tokenization_bert_fast"] = ["BertTokenizerFast"]
31 | 
32 | if is_torch_available():
33 |     _import_structure["modeling_bert"] = [
34 |         "BERT_PRETRAINED_MODEL_ARCHIVE_LIST",
35 |         "BertForMaskedLM",
36 |         "BertForMultipleChoice",
37 |         "BertForNextSentencePrediction",
38 |         "BertForPreTraining",
39 |         "BertForQuestionAnswering",
40 |         "BertForSequenceClassification",
41 |         "BertForTokenClassification",
42 |         "BertLayer",
43 |         "BertLMHeadModel",
44 |         "BertAdapterModel",
45 |         "BertPreTrainedModel",
46 |         "load_tf_weights_in_bert",
47 |     ]
48 | 
49 | if TYPE_CHECKING:
50 | 
51 |     if is_torch_available():
52 |         from .modeling_bert import (
53 |             BertAdapterModel
54 |         )
55 | 
56 | else:
57 |     import sys
58 | 
59 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)


--------------------------------------------------------------------------------
/modeling/transformers/models/bert_generation/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_sentencepiece_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_bert_generation": ["BertGenerationConfig"],
26 | }
27 | 
28 | if is_sentencepiece_available():
29 |     _import_structure["tokenization_bert_generation"] = ["BertGenerationTokenizer"]
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_bert_generation"] = [
33 |         "BertGenerationDecoder",
34 |         "BertGenerationEncoder",
35 |         "BertGenerationPreTrainedModel",
36 |         "load_tf_weights_in_bert_generation",
37 |     ]
38 | 
39 | 
40 | if TYPE_CHECKING:
41 |     from .configuration_bert_generation import BertGenerationConfig
42 | 
43 |     if is_sentencepiece_available():
44 |         from .tokenization_bert_generation import BertGenerationTokenizer
45 | 
46 |     if is_torch_available():
47 |         from .modeling_bert_generation import (
48 |             BertGenerationDecoder,
49 |             BertGenerationEncoder,
50 |             BertGenerationPreTrainedModel,
51 |             load_tf_weights_in_bert_generation,
52 |         )
53 | 
54 | else:
55 |     import sys
56 | 
57 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
58 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/bert_japanese/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_bert_japanese": ["BertJapaneseTokenizer", "CharacterTokenizer", "MecabTokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
31 | 
32 | else:
33 |     import sys
34 | 
35 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
36 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/bertweet/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_bertweet": ["BertweetTokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_bertweet import BertweetTokenizer
31 | 
32 | else:
33 |     import sys
34 | 
35 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
36 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/bigbird_pegasus/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_bigbird_pegasus": ["BIGBIRD_PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP", "BigBirdPegasusConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_bigbird_pegasus"] = [
29 |         "BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "BigBirdPegasusForCausalLM",
31 |         "BigBirdPegasusForConditionalGeneration",
32 |         "BigBirdPegasusForQuestionAnswering",
33 |         "BigBirdPegasusForSequenceClassification",
34 |         "BigBirdPegasusModel",
35 |         "BigBirdPegasusPreTrainedModel",
36 |     ]
37 | 
38 | 
39 | if TYPE_CHECKING:
40 |     from .configuration_bigbird_pegasus import BIGBIRD_PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdPegasusConfig
41 | 
42 |     if is_torch_available():
43 |         from .modeling_bigbird_pegasus import (
44 |             BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST,
45 |             BigBirdPegasusForCausalLM,
46 |             BigBirdPegasusForConditionalGeneration,
47 |             BigBirdPegasusForQuestionAnswering,
48 |             BigBirdPegasusForSequenceClassification,
49 |             BigBirdPegasusModel,
50 |             BigBirdPegasusPreTrainedModel,
51 |         )
52 | 
53 | 
54 | else:
55 |     import sys
56 | 
57 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
58 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/bort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenchan0226/FineGrainedFact/3d96823657fd23ff1091fc6e972c66a40222ec97/modeling/transformers/models/bort/__init__.py


--------------------------------------------------------------------------------
/modeling/transformers/models/byt5/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_byt5": ["ByT5Tokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_byt5 import ByT5Tokenizer
31 | else:
32 |     import sys
33 | 
34 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
35 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The T5 authors and HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert T5 checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | from transformers import T5Config, T5ForConditionalGeneration, load_tf_weights_in_t5
21 | from transformers.utils import logging
22 | 
23 | 
24 | logging.set_verbosity_info()
25 | 
26 | 
27 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
28 |     # Initialise PyTorch model
29 |     config = T5Config.from_json_file(config_file)
30 |     print(f"Building PyTorch model from configuration: {config}")
31 |     model = T5ForConditionalGeneration(config)
32 | 
33 |     # Load weights from tf checkpoint
34 |     load_tf_weights_in_t5(model, config, tf_checkpoint_path)
35 | 
36 |     # Save pytorch-model
37 |     print(f"Save PyTorch model to {pytorch_dump_path}")
38 |     model.save_pretrained(pytorch_dump_path)
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     parser = argparse.ArgumentParser()
43 |     # Required parameters
44 |     parser.add_argument(
45 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
46 |     )
47 |     parser.add_argument(
48 |         "--config_file",
49 |         default=None,
50 |         type=str,
51 |         required=True,
52 |         help="The config json file corresponding to the pre-trained T5 model. \n"
53 |         "This specifies the model architecture.",
54 |     )
55 |     parser.add_argument(
56 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
57 |     )
58 |     args = parser.parse_args()
59 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
60 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/camembert/configuration_camembert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ CamemBERT configuration"""
17 | 
18 | from collections import OrderedDict
19 | from typing import Mapping
20 | 
21 | from ...onnx import OnnxConfig
22 | from ...utils import logging
23 | from ..roberta.configuration_roberta import RobertaConfig
24 | 
25 | 
26 | logger = logging.get_logger(__name__)
27 | 
28 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
29 |     "camembert-base": "https://huggingface.co/camembert-base/resolve/main/config.json",
30 |     "umberto-commoncrawl-cased-v1": "https://huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/resolve/main/config.json",
31 |     "umberto-wikipedia-uncased-v1": "https://huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/resolve/main/config.json",
32 | }
33 | 
34 | 
35 | class CamembertConfig(RobertaConfig):
36 |     """
37 |     This class overrides [`RobertaConfig`]. Please check the superclass for the appropriate documentation alongside
38 |     usage examples.
39 |     """
40 | 
41 |     model_type = "camembert"
42 | 
43 | 
44 | class CamembertOnnxConfig(OnnxConfig):
45 |     @property
46 |     def inputs(self) -> Mapping[str, Mapping[int, str]]:
47 |         return OrderedDict(
48 |             [
49 |                 ("input_ids", {0: "batch", 1: "sequence"}),
50 |                 ("attention_mask", {0: "batch", 1: "sequence"}),
51 |             ]
52 |         )
53 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/canine/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_tokenizers_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_canine": ["CANINE_PRETRAINED_CONFIG_ARCHIVE_MAP", "CanineConfig"],
25 |     "tokenization_canine": ["CanineTokenizer"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_canine"] = [
30 |         "CANINE_PRETRAINED_MODEL_ARCHIVE_LIST",
31 |         "CanineForMultipleChoice",
32 |         "CanineForQuestionAnswering",
33 |         "CanineForSequenceClassification",
34 |         "CanineForTokenClassification",
35 |         "CanineLayer",
36 |         "CanineModel",
37 |         "CaninePreTrainedModel",
38 |         "load_tf_weights_in_canine",
39 |     ]
40 | 
41 | 
42 | if TYPE_CHECKING:
43 |     from .configuration_canine import CANINE_PRETRAINED_CONFIG_ARCHIVE_MAP, CanineConfig
44 |     from .tokenization_canine import CanineTokenizer
45 | 
46 |     if is_torch_available():
47 |         from .modeling_canine import (
48 |             CANINE_PRETRAINED_MODEL_ARCHIVE_LIST,
49 |             CanineForMultipleChoice,
50 |             CanineForQuestionAnswering,
51 |             CanineForSequenceClassification,
52 |             CanineForTokenClassification,
53 |             CanineLayer,
54 |             CanineModel,
55 |             CaninePreTrainedModel,
56 |             load_tf_weights_in_canine,
57 |         )
58 | 
59 | 
60 | else:
61 |     import sys
62 | 
63 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
64 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert CANINE checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | from transformers import CanineConfig, CanineModel, CanineTokenizer, load_tf_weights_in_canine
21 | from transformers.utils import logging
22 | 
23 | 
24 | logging.set_verbosity_info()
25 | 
26 | 
27 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, pytorch_dump_path):
28 | 
29 |     # Initialize PyTorch model
30 |     config = CanineConfig()
31 |     model = CanineModel(config)
32 |     model.eval()
33 | 
34 |     print(f"Building PyTorch model from configuration: {config}")
35 | 
36 |     # Load weights from tf checkpoint
37 |     load_tf_weights_in_canine(model, config, tf_checkpoint_path)
38 | 
39 |     # Save pytorch-model (weights and configuration)
40 |     print(f"Save PyTorch model to {pytorch_dump_path}")
41 |     model.save_pretrained(pytorch_dump_path)
42 | 
43 |     # Save tokenizer files
44 |     tokenizer = CanineTokenizer()
45 |     print(f"Save tokenizer files to {pytorch_dump_path}")
46 |     tokenizer.save_pretrained(pytorch_dump_path)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     parser = argparse.ArgumentParser()
51 |     # Required parameters
52 |     parser.add_argument(
53 |         "--tf_checkpoint_path",
54 |         default=None,
55 |         type=str,
56 |         required=True,
57 |         help="Path to the TensorFlow checkpoint. Should end with model.ckpt",
58 |     )
59 |     parser.add_argument(
60 |         "--pytorch_dump_path",
61 |         default=None,
62 |         type=str,
63 |         required=True,
64 |         help="Path to a folder where the PyTorch model will be placed.",
65 |     )
66 |     args = parser.parse_args()
67 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.pytorch_dump_path)
68 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/convbert/convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert ConvBERT checkpoint."""
16 | 
17 | import argparse
18 | 
19 | from transformers import ConvBertConfig, ConvBertModel, TFConvBertModel, load_tf_weights_in_convbert
20 | from transformers.utils import logging
21 | 
22 | 
23 | logging.set_verbosity_info()
24 | 
25 | 
26 | def convert_orig_tf1_checkpoint_to_pytorch(tf_checkpoint_path, convbert_config_file, pytorch_dump_path):
27 |     conf = ConvBertConfig.from_json_file(convbert_config_file)
28 |     model = ConvBertModel(conf)
29 | 
30 |     model = load_tf_weights_in_convbert(model, conf, tf_checkpoint_path)
31 |     model.save_pretrained(pytorch_dump_path)
32 | 
33 |     tf_model = TFConvBertModel.from_pretrained(pytorch_dump_path, from_pt=True)
34 |     tf_model.save_pretrained(pytorch_dump_path)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     parser = argparse.ArgumentParser()
39 |     # Required parameters
40 |     parser.add_argument(
41 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
42 |     )
43 |     parser.add_argument(
44 |         "--convbert_config_file",
45 |         default=None,
46 |         type=str,
47 |         required=True,
48 |         help="The config json file corresponding to the pre-trained ConvBERT model. \n"
49 |         "This specifies the model architecture.",
50 |     )
51 |     parser.add_argument(
52 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
53 |     )
54 |     args = parser.parse_args()
55 |     convert_orig_tf1_checkpoint_to_pytorch(args.tf_checkpoint_path, args.convbert_config_file, args.pytorch_dump_path)
56 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/convbert/tokenization_convbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for ConvBERT."""
16 | from ...utils import logging
17 | from ..bert.tokenization_bert import BertTokenizer
18 | 
19 | 
20 | logger = logging.get_logger(__name__)
21 | 
22 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
23 | 
24 | PRETRAINED_VOCAB_FILES_MAP = {
25 |     "vocab_file": {
26 |         "YituTech/conv-bert-base": "https://huggingface.co/YituTech/conv-bert-base/resolve/main/vocab.txt",
27 |         "YituTech/conv-bert-medium-small": "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/vocab.txt",
28 |         "YituTech/conv-bert-small": "https://huggingface.co/YituTech/conv-bert-small/resolve/main/vocab.txt",
29 |     }
30 | }
31 | 
32 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
33 |     "YituTech/conv-bert-base": 512,
34 |     "YituTech/conv-bert-medium-small": 512,
35 |     "YituTech/conv-bert-small": 512,
36 | }
37 | 
38 | 
39 | PRETRAINED_INIT_CONFIGURATION = {
40 |     "YituTech/conv-bert-base": {"do_lower_case": True},
41 |     "YituTech/conv-bert-medium-small": {"do_lower_case": True},
42 |     "YituTech/conv-bert-small": {"do_lower_case": True},
43 | }
44 | 
45 | 
46 | class ConvBertTokenizer(BertTokenizer):
47 |     r"""
48 |     Construct a ConvBERT tokenizer. [`ConvBertTokenizer`] is identical to [`BertTokenizer`] and runs end-to-end
49 |     tokenization: punctuation splitting and wordpiece. Refer to superclass [`BertTokenizer`] for usage examples and
50 |     documentation concerning parameters.
51 |     """
52 | 
53 |     vocab_files_names = VOCAB_FILES_NAMES
54 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
55 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
56 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
57 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/convbert/tokenization_convbert_fast.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright The HuggingFace Inc. team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for ConvBERT."""
16 | from ...utils import logging
17 | from ..bert.tokenization_bert_fast import BertTokenizerFast
18 | from .tokenization_convbert import ConvBertTokenizer
19 | 
20 | 
21 | logger = logging.get_logger(__name__)
22 | 
23 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
24 | 
25 | PRETRAINED_VOCAB_FILES_MAP = {
26 |     "vocab_file": {
27 |         "YituTech/conv-bert-base": "https://huggingface.co/YituTech/conv-bert-base/resolve/main/vocab.txt",
28 |         "YituTech/conv-bert-medium-small": "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/vocab.txt",
29 |         "YituTech/conv-bert-small": "https://huggingface.co/YituTech/conv-bert-small/resolve/main/vocab.txt",
30 |     }
31 | }
32 | 
33 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
34 |     "YituTech/conv-bert-base": 512,
35 |     "YituTech/conv-bert-medium-small": 512,
36 |     "YituTech/conv-bert-small": 512,
37 | }
38 | 
39 | 
40 | PRETRAINED_INIT_CONFIGURATION = {
41 |     "YituTech/conv-bert-base": {"do_lower_case": True},
42 |     "YituTech/conv-bert-medium-small": {"do_lower_case": True},
43 |     "YituTech/conv-bert-small": {"do_lower_case": True},
44 | }
45 | 
46 | 
47 | class ConvBertTokenizerFast(BertTokenizerFast):
48 |     r"""
49 |     Construct a "fast" ConvBERT tokenizer (backed by HuggingFace's *tokenizers* library).
50 | 
51 |     [`ConvBertTokenizerFast`] is identical to [`BertTokenizerFast`] and runs end-to-end tokenization: punctuation
52 |     splitting and wordpiece.
53 | 
54 |     Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning parameters.
55 |     """
56 |     vocab_files_names = VOCAB_FILES_NAMES
57 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
58 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
59 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
60 |     slow_tokenizer_class = ConvBertTokenizer
61 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/convnext/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_tf_available, is_torch_available, is_vision_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_convnext": ["CONVNEXT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ConvNextConfig"],
26 | }
27 | 
28 | if is_vision_available():
29 |     _import_structure["feature_extraction_convnext"] = ["ConvNextFeatureExtractor"]
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_convnext"] = [
33 |         "CONVNEXT_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "ConvNextForImageClassification",
35 |         "ConvNextModel",
36 |         "ConvNextPreTrainedModel",
37 |     ]
38 | 
39 | if is_tf_available():
40 |     _import_structure["modeling_tf_convnext"] = [
41 |         "TFConvNextForImageClassification",
42 |         "TFConvNextModel",
43 |         "TFConvNextPreTrainedModel",
44 |     ]
45 | 
46 | if TYPE_CHECKING:
47 |     from .configuration_convnext import CONVNEXT_PRETRAINED_CONFIG_ARCHIVE_MAP, ConvNextConfig
48 | 
49 |     if is_vision_available():
50 |         from .feature_extraction_convnext import ConvNextFeatureExtractor
51 | 
52 |     if is_torch_available():
53 |         from .modeling_convnext import (
54 |             CONVNEXT_PRETRAINED_MODEL_ARCHIVE_LIST,
55 |             ConvNextForImageClassification,
56 |             ConvNextModel,
57 |             ConvNextPreTrainedModel,
58 |         )
59 | 
60 |     if is_tf_available():
61 |         from .modeling_convnext import TFConvNextForImageClassification, TFConvNextModel, TFConvNextPreTrainedModel
62 | 
63 | 
64 | else:
65 |     import sys
66 | 
67 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
68 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/cpm/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_sentencepiece_available, is_tokenizers_available
22 | 
23 | 
24 | _import_structure = {}
25 | 
26 | if is_sentencepiece_available():
27 |     _import_structure["tokenization_cpm"] = ["CpmTokenizer"]
28 | 
29 | if is_tokenizers_available():
30 |     _import_structure["tokenization_cpm_fast"] = ["CpmTokenizerFast"]
31 | 
32 | 
33 | if TYPE_CHECKING:
34 |     if is_sentencepiece_available():
35 |         from .tokenization_cpm import CpmTokenizer
36 | 
37 |     if is_tokenizers_available():
38 |         from .tokenization_cpm_fast import CpmTokenizerFast
39 | 
40 | else:
41 |     import sys
42 | 
43 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
44 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/decision_transformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_decision_transformer": [
26 |         "DECISION_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP",
27 |         "DecisionTransformerConfig",
28 |     ],
29 | }
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_decision_transformer"] = [
33 |         "DECISION_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "DecisionTransformerGPT2Model",
35 |         "DecisionTransformerGPT2PreTrainedModel",
36 |         "DecisionTransformerModel",
37 |         "DecisionTransformerPreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_decision_transformer import (
43 |         DECISION_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
44 |         DecisionTransformerConfig,
45 |     )
46 | 
47 |     if is_torch_available():
48 |         from .modeling_decision_transformer import (
49 |             DECISION_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
50 |             DecisionTransformerGPT2Model,
51 |             DecisionTransformerGPT2PreTrainedModel,
52 |             DecisionTransformerModel,
53 |             DecisionTransformerPreTrainedModel,
54 |         )
55 | 
56 | 
57 | else:
58 |     import sys
59 | 
60 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
61 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/deit/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available, is_vision_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_deit": ["DEIT_PRETRAINED_CONFIG_ARCHIVE_MAP", "DeiTConfig"],
25 | }
26 | 
27 | if is_vision_available():
28 |     _import_structure["feature_extraction_deit"] = ["DeiTFeatureExtractor"]
29 | 
30 | if is_torch_available():
31 |     _import_structure["modeling_deit"] = [
32 |         "DEIT_PRETRAINED_MODEL_ARCHIVE_LIST",
33 |         "DeiTForImageClassification",
34 |         "DeiTForImageClassificationWithTeacher",
35 |         "DeiTForMaskedImageModeling",
36 |         "DeiTModel",
37 |         "DeiTPreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_deit import DEIT_PRETRAINED_CONFIG_ARCHIVE_MAP, DeiTConfig
43 | 
44 |     if is_vision_available():
45 |         from .feature_extraction_deit import DeiTFeatureExtractor
46 | 
47 |     if is_torch_available():
48 |         from .modeling_deit import (
49 |             DEIT_PRETRAINED_MODEL_ARCHIVE_LIST,
50 |             DeiTForImageClassification,
51 |             DeiTForImageClassificationWithTeacher,
52 |             DeiTForMaskedImageModeling,
53 |             DeiTModel,
54 |             DeiTPreTrainedModel,
55 |         )
56 | 
57 | 
58 | else:
59 |     import sys
60 | 
61 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/detr/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_timm_available, is_vision_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_detr": ["DETR_PRETRAINED_CONFIG_ARCHIVE_MAP", "DetrConfig"],
26 | }
27 | 
28 | if is_vision_available():
29 |     _import_structure["feature_extraction_detr"] = ["DetrFeatureExtractor"]
30 | 
31 | if is_timm_available():
32 |     _import_structure["modeling_detr"] = [
33 |         "DETR_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "DetrForObjectDetection",
35 |         "DetrForSegmentation",
36 |         "DetrModel",
37 |         "DetrPreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_detr import DETR_PRETRAINED_CONFIG_ARCHIVE_MAP, DetrConfig
43 | 
44 |     if is_vision_available():
45 |         from .feature_extraction_detr import DetrFeatureExtractor
46 | 
47 |     if is_timm_available():
48 |         from .modeling_detr import (
49 |             DETR_PRETRAINED_MODEL_ARCHIVE_LIST,
50 |             DetrForObjectDetection,
51 |             DetrForSegmentation,
52 |             DetrModel,
53 |             DetrPreTrainedModel,
54 |         )
55 | 
56 | else:
57 |     import sys
58 | 
59 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
60 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/dialogpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenchan0226/FineGrainedFact/3d96823657fd23ff1091fc6e972c66a40222ec97/modeling/transformers/models/dialogpt/__init__.py


--------------------------------------------------------------------------------
/modeling/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | import os
17 | 
18 | import torch
19 | 
20 | from transformers.utils import WEIGHTS_NAME
21 | 
22 | 
23 | DIALOGPT_MODELS = ["small", "medium", "large"]
24 | 
25 | OLD_KEY = "lm_head.decoder.weight"
26 | NEW_KEY = "lm_head.weight"
27 | 
28 | 
29 | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
30 |     d = torch.load(checkpoint_path)
31 |     d[NEW_KEY] = d.pop(OLD_KEY)
32 |     os.makedirs(pytorch_dump_folder_path, exist_ok=True)
33 |     torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     parser = argparse.ArgumentParser()
38 |     parser.add_argument("--dialogpt_path", default=".", type=str)
39 |     args = parser.parse_args()
40 |     for MODEL in DIALOGPT_MODELS:
41 |         checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl")
42 |         pytorch_dump_folder_path = f"./DialoGPT-{MODEL}"
43 |         convert_dialogpt_checkpoint(
44 |             checkpoint_path,
45 |             pytorch_dump_folder_path,
46 |         )
47 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/dit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenchan0226/FineGrainedFact/3d96823657fd23ff1091fc6e972c66a40222ec97/modeling/transformers/models/dit/__init__.py


--------------------------------------------------------------------------------
/modeling/transformers/models/dpt/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...file_utils import _LazyModule, is_tokenizers_available, is_torch_available, is_vision_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_dpt": ["DPT_PRETRAINED_CONFIG_ARCHIVE_MAP", "DPTConfig"],
25 | }
26 | 
27 | if is_vision_available():
28 |     _import_structure["feature_extraction_dpt"] = ["DPTFeatureExtractor"]
29 | 
30 | if is_torch_available():
31 |     _import_structure["modeling_dpt"] = [
32 |         "DPT_PRETRAINED_MODEL_ARCHIVE_LIST",
33 |         "DPTForDepthEstimation",
34 |         "DPTForSemanticSegmentation",
35 |         "DPTModel",
36 |         "DPTPreTrainedModel",
37 |     ]
38 | 
39 | 
40 | if TYPE_CHECKING:
41 |     from .configuration_dpt import DPT_PRETRAINED_CONFIG_ARCHIVE_MAP, DPTConfig
42 | 
43 |     if is_vision_available():
44 |         from .feature_extraction_dpt import DPTFeatureExtractor
45 | 
46 |     if is_torch_available():
47 |         from .modeling_dpt import (
48 |             DPT_PRETRAINED_MODEL_ARCHIVE_LIST,
49 |             DPTForDepthEstimation,
50 |             DPTForSemanticSegmentation,
51 |             DPTModel,
52 |             DPTPreTrainedModel,
53 |         )
54 | 
55 | 
56 | else:
57 |     import sys
58 | 
59 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
60 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/encoder_decoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_flax_available, is_tf_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_encoder_decoder": ["EncoderDecoderConfig"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_encoder_decoder"] = ["EncoderDecoderModel"]
30 | 
31 | if is_tf_available():
32 |     _import_structure["modeling_tf_encoder_decoder"] = ["TFEncoderDecoderModel"]
33 | 
34 | if is_flax_available():
35 |     _import_structure["modeling_flax_encoder_decoder"] = ["FlaxEncoderDecoderModel"]
36 | 
37 | if TYPE_CHECKING:
38 |     from .configuration_encoder_decoder import EncoderDecoderConfig
39 | 
40 |     if is_torch_available():
41 |         from .modeling_encoder_decoder import EncoderDecoderModel
42 | 
43 |     if is_tf_available():
44 |         from .modeling_tf_encoder_decoder import TFEncoderDecoderModel
45 | 
46 |     if is_flax_available():
47 |         from .modeling_flax_encoder_decoder import FlaxEncoderDecoderModel
48 | 
49 | else:
50 |     import sys
51 | 
52 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
53 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/fsmt/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_fsmt": ["FSMT_PRETRAINED_CONFIG_ARCHIVE_MAP", "FSMTConfig"],
26 |     "tokenization_fsmt": ["FSMTTokenizer"],
27 | }
28 | 
29 | if is_torch_available():
30 |     _import_structure["modeling_fsmt"] = ["FSMTForConditionalGeneration", "FSMTModel", "PretrainedFSMTModel"]
31 | 
32 | 
33 | if TYPE_CHECKING:
34 |     from .configuration_fsmt import FSMT_PRETRAINED_CONFIG_ARCHIVE_MAP, FSMTConfig
35 |     from .tokenization_fsmt import FSMTTokenizer
36 | 
37 |     if is_torch_available():
38 |         from .modeling_fsmt import FSMTForConditionalGeneration, FSMTModel, PretrainedFSMTModel
39 | 
40 | else:
41 |     import sys
42 | 
43 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
44 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/glpn/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_torch_available, is_vision_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_glpn": ["GLPN_PRETRAINED_CONFIG_ARCHIVE_MAP", "GLPNConfig"],
26 | }
27 | 
28 | if is_vision_available():
29 |     _import_structure["feature_extraction_glpn"] = ["GLPNFeatureExtractor"]
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_glpn"] = [
33 |         "GLPN_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "GLPNForDepthEstimation",
35 |         "GLPNLayer",
36 |         "GLPNModel",
37 |         "GLPNPreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_glpn import GLPN_PRETRAINED_CONFIG_ARCHIVE_MAP, GLPNConfig
43 | 
44 |     if is_vision_available():
45 |         from .feature_extraction_glpn import GLPNFeatureExtractor
46 | 
47 |     if is_torch_available():
48 |         from .modeling_glpn import (
49 |             GLPN_PRETRAINED_MODEL_ARCHIVE_LIST,
50 |             GLPNForDepthEstimation,
51 |             GLPNLayer,
52 |             GLPNModel,
53 |             GLPNPreTrainedModel,
54 |         )
55 | 
56 | 
57 | else:
58 |     import sys
59 | 
60 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
61 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/gpt_neo/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_flax_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_gpt_neo": ["GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP", "GPTNeoConfig", "GPTNeoOnnxConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_gpt_neo"] = [
29 |         "GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "GPTNeoForCausalLM",
31 |         "GPTNeoForSequenceClassification",
32 |         "GPTNeoModel",
33 |         "GPTNeoPreTrainedModel",
34 |         "load_tf_weights_in_gpt_neo",
35 |     ]
36 | 
37 | if is_flax_available():
38 |     _import_structure["modeling_flax_gpt_neo"] = [
39 |         "FlaxGPTNeoForCausalLM",
40 |         "FlaxGPTNeoModel",
41 |         "FlaxGPTNeoPreTrainedModel",
42 |     ]
43 | 
44 | 
45 | if TYPE_CHECKING:
46 |     from .configuration_gpt_neo import GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP, GPTNeoConfig, GPTNeoOnnxConfig
47 | 
48 |     if is_torch_available():
49 |         from .modeling_gpt_neo import (
50 |             GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST,
51 |             GPTNeoForCausalLM,
52 |             GPTNeoForSequenceClassification,
53 |             GPTNeoModel,
54 |             GPTNeoPreTrainedModel,
55 |             load_tf_weights_in_gpt_neo,
56 |         )
57 | 
58 |     if is_flax_available():
59 |         from .modeling_flax_gpt_neo import FlaxGPTNeoForCausalLM, FlaxGPTNeoModel, FlaxGPTNeoPreTrainedModel
60 | 
61 | 
62 | else:
63 |     import sys
64 | 
65 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
66 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/herbert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_tokenizers_available
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_herbert": ["HerbertTokenizer"],
26 | }
27 | 
28 | if is_tokenizers_available():
29 |     _import_structure["tokenization_herbert_fast"] = ["HerbertTokenizerFast"]
30 | 
31 | 
32 | if TYPE_CHECKING:
33 |     from .tokenization_herbert import HerbertTokenizer
34 | 
35 |     if is_tokenizers_available():
36 |         from .tokenization_herbert_fast import HerbertTokenizerFast
37 | 
38 | else:
39 |     import sys
40 | 
41 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
42 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/ibert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_ibert": ["IBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "IBertConfig", "IBertOnnxConfig"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_ibert"] = [
30 |         "IBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
31 |         "IBertForMaskedLM",
32 |         "IBertForMultipleChoice",
33 |         "IBertForQuestionAnswering",
34 |         "IBertForSequenceClassification",
35 |         "IBertForTokenClassification",
36 |         "IBertModel",
37 |         "IBertPreTrainedModel",
38 |     ]
39 | 
40 | if TYPE_CHECKING:
41 |     from .configuration_ibert import IBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, IBertConfig, IBertOnnxConfig
42 | 
43 |     if is_torch_available():
44 |         from .modeling_ibert import (
45 |             IBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
46 |             IBertForMaskedLM,
47 |             IBertForMultipleChoice,
48 |             IBertForQuestionAnswering,
49 |             IBertForSequenceClassification,
50 |             IBertForTokenClassification,
51 |             IBertModel,
52 |             IBertPreTrainedModel,
53 |         )
54 | 
55 | else:
56 |     import sys
57 | 
58 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
59 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/imagegpt/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_torch_available, is_vision_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_imagegpt": ["IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ImageGPTConfig"],
26 | }
27 | 
28 | if is_vision_available():
29 |     _import_structure["feature_extraction_imagegpt"] = ["ImageGPTFeatureExtractor"]
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_imagegpt"] = [
33 |         "IMAGEGPT_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "ImageGPTForCausalImageModeling",
35 |         "ImageGPTForImageClassification",
36 |         "ImageGPTModel",
37 |         "ImageGPTPreTrainedModel",
38 |         "load_tf_weights_in_imagegpt",
39 |     ]
40 | 
41 | 
42 | if TYPE_CHECKING:
43 |     from .configuration_imagegpt import IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP, ImageGPTConfig
44 | 
45 |     if is_vision_available():
46 |         from .feature_extraction_imagegpt import ImageGPTFeatureExtractor
47 | 
48 |     if is_torch_available():
49 |         from .modeling_imagegpt import (
50 |             IMAGEGPT_PRETRAINED_MODEL_ARCHIVE_LIST,
51 |             ImageGPTForCausalImageModeling,
52 |             ImageGPTForImageClassification,
53 |             ImageGPTModel,
54 |             ImageGPTPreTrainedModel,
55 |             load_tf_weights_in_imagegpt,
56 |         )
57 | 
58 | else:
59 |     import sys
60 | 
61 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/layoutlm/tokenization_layoutlm.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Microsoft Research Asia LayoutLM Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ Tokenization class for model LayoutLM."""
16 | 
17 | 
18 | from ...utils import logging
19 | from ..bert.tokenization_bert import BertTokenizer
20 | 
21 | 
22 | logger = logging.get_logger(__name__)
23 | 
24 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
25 | 
26 | PRETRAINED_VOCAB_FILES_MAP = {
27 |     "vocab_file": {
28 |         "microsoft/layoutlm-base-uncased": "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/vocab.txt",
29 |         "microsoft/layoutlm-large-uncased": "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/vocab.txt",
30 |     }
31 | }
32 | 
33 | 
34 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
35 |     "microsoft/layoutlm-base-uncased": 512,
36 |     "microsoft/layoutlm-large-uncased": 512,
37 | }
38 | 
39 | 
40 | PRETRAINED_INIT_CONFIGURATION = {
41 |     "microsoft/layoutlm-base-uncased": {"do_lower_case": True},
42 |     "microsoft/layoutlm-large-uncased": {"do_lower_case": True},
43 | }
44 | 
45 | 
46 | class LayoutLMTokenizer(BertTokenizer):
47 |     r"""
48 |     Constructs a LayoutLM tokenizer.
49 | 
50 |     [`LayoutLMTokenizer`] is identical to [`BertTokenizer`] and runs end-to-end tokenization: punctuation splitting +
51 |     wordpiece.
52 | 
53 |     Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning parameters.
54 |     """
55 | 
56 |     vocab_files_names = VOCAB_FILES_NAMES
57 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
58 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
59 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
60 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/layoutxlm/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import (
22 |     _LazyModule,
23 |     is_sentencepiece_available,
24 |     is_tokenizers_available,
25 |     is_torch_available,
26 |     is_vision_available,
27 | )
28 | 
29 | 
30 | _import_structure = {}
31 | 
32 | if is_sentencepiece_available():
33 |     _import_structure["tokenization_layoutxlm"] = ["LayoutXLMTokenizer"]
34 | 
35 | if is_tokenizers_available():
36 |     _import_structure["tokenization_layoutxlm_fast"] = ["LayoutXLMTokenizerFast"]
37 | 
38 | if is_vision_available():
39 |     _import_structure["processing_layoutxlm"] = ["LayoutXLMProcessor"]
40 | 
41 | if TYPE_CHECKING:
42 |     if is_sentencepiece_available():
43 |         from .tokenization_layoutxlm import LayoutXLMTokenizer
44 | 
45 |     if is_tokenizers_available():
46 |         from .tokenization_layoutxlm_fast import LayoutXLMTokenizerFast
47 | 
48 |     if is_vision_available():
49 |         from .processing_layoutlmv2 import LayoutXLMProcessor
50 | 
51 | else:
52 |     import sys
53 | 
54 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
55 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/luke/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_luke": ["LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP", "LukeConfig"],
26 |     "tokenization_luke": ["LukeTokenizer"],
27 | }
28 | 
29 | if is_torch_available():
30 |     _import_structure["modeling_luke"] = [
31 |         "LUKE_PRETRAINED_MODEL_ARCHIVE_LIST",
32 |         "LukeForEntityClassification",
33 |         "LukeForEntityPairClassification",
34 |         "LukeForEntitySpanClassification",
35 |         "LukeForMaskedLM",
36 |         "LukeModel",
37 |         "LukePreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_luke import LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP, LukeConfig
43 |     from .tokenization_luke import LukeTokenizer
44 | 
45 |     if is_torch_available():
46 |         from .modeling_luke import (
47 |             LUKE_PRETRAINED_MODEL_ARCHIVE_LIST,
48 |             LukeForEntityClassification,
49 |             LukeForEntityPairClassification,
50 |             LukeForEntitySpanClassification,
51 |             LukeForMaskedLM,
52 |             LukeModel,
53 |             LukePreTrainedModel,
54 |         )
55 | 
56 | else:
57 |     import sys
58 | 
59 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
60 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert LXMERT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | import torch
21 | 
22 | from transformers import LxmertConfig, LxmertForPreTraining, load_tf_weights_in_lxmert
23 | from transformers.utils import logging
24 | 
25 | 
26 | logging.set_verbosity_info()
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = LxmertConfig.from_json_file(config_file)
32 |     print(f"Building PyTorch model from configuration: {config}")
33 |     model = LxmertForPreTraining(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_lxmert(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print(f"Save PyTorch model to {pytorch_dump_path}")
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/lxmert/tokenization_lxmert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Team, Stanford University and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from ..bert.tokenization_bert import BertTokenizer
17 | 
18 | 
19 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
20 | 
21 | PRETRAINED_VOCAB_FILES_MAP = {
22 |     "vocab_file": {
23 |         "unc-nlp/lxmert-base-uncased": "https://huggingface.co/unc-nlp/lxmert-base-uncased/resolve/main/vocab.txt",
24 |     }
25 | }
26 | 
27 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
28 |     "unc-nlp/lxmert-base-uncased": 512,
29 | }
30 | 
31 | PRETRAINED_INIT_CONFIGURATION = {
32 |     "unc-nlp/lxmert-base-uncased": {"do_lower_case": True},
33 | }
34 | 
35 | 
36 | class LxmertTokenizer(BertTokenizer):
37 |     r"""
38 |     Construct an LXMERT tokenizer.
39 | 
40 |     [`LxmertTokenizer`] is identical to [`BertTokenizer`] and runs end-to-end tokenization: punctuation splitting and
41 |     wordpiece.
42 | 
43 |     Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning parameters.
44 |     """
45 | 
46 |     vocab_files_names = VOCAB_FILES_NAMES
47 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
48 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
49 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
50 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/lxmert/tokenization_lxmert_fast.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Team, Stanford University and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from ..bert.tokenization_bert_fast import BertTokenizerFast
17 | from .tokenization_lxmert import LxmertTokenizer
18 | 
19 | 
20 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.json"}
21 | 
22 | PRETRAINED_VOCAB_FILES_MAP = {
23 |     "vocab_file": {
24 |         "unc-nlp/lxmert-base-uncased": "https://huggingface.co/unc-nlp/lxmert-base-uncased/resolve/main/vocab.txt",
25 |     },
26 |     "tokenizer_file": {
27 |         "unc-nlp/lxmert-base-uncased": "https://huggingface.co/unc-nlp/lxmert-base-uncased/resolve/main/tokenizer.json",
28 |     },
29 | }
30 | 
31 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
32 |     "unc-nlp/lxmert-base-uncased": 512,
33 | }
34 | 
35 | PRETRAINED_INIT_CONFIGURATION = {
36 |     "unc-nlp/lxmert-base-uncased": {"do_lower_case": True},
37 | }
38 | 
39 | 
40 | class LxmertTokenizerFast(BertTokenizerFast):
41 |     r"""
42 |     Construct a "fast" LXMERT tokenizer (backed by HuggingFace's *tokenizers* library).
43 | 
44 |     [`LxmertTokenizerFast`] is identical to [`BertTokenizerFast`] and runs end-to-end tokenization: punctuation
45 |     splitting and wordpiece.
46 | 
47 |     Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning parameters.
48 |     """
49 |     vocab_files_names = VOCAB_FILES_NAMES
50 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
51 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
52 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
53 |     slow_tokenizer_class = LxmertTokenizer
54 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/m2m_100/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_tokenizers_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_m2m_100": ["M2M_100_PRETRAINED_CONFIG_ARCHIVE_MAP", "M2M100Config", "M2M100OnnxConfig"],
25 |     "tokenization_m2m_100": ["M2M100Tokenizer"],
26 | }
27 | 
28 | 
29 | if is_torch_available():
30 |     _import_structure["modeling_m2m_100"] = [
31 |         "M2M_100_PRETRAINED_MODEL_ARCHIVE_LIST",
32 |         "M2M100ForConditionalGeneration",
33 |         "M2M100Model",
34 |         "M2M100PreTrainedModel",
35 |     ]
36 | 
37 | 
38 | if TYPE_CHECKING:
39 |     from .configuration_m2m_100 import M2M_100_PRETRAINED_CONFIG_ARCHIVE_MAP, M2M100Config, M2M100OnnxConfig
40 |     from .tokenization_m2m_100 import M2M100Tokenizer
41 | 
42 |     if is_torch_available():
43 |         from .modeling_m2m_100 import (
44 |             M2M_100_PRETRAINED_MODEL_ARCHIVE_LIST,
45 |             M2M100ForConditionalGeneration,
46 |             M2M100Model,
47 |             M2M100PreTrainedModel,
48 |         )
49 | 
50 | 
51 | else:
52 |     import sys
53 | 
54 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
55 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/maskformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available, is_vision_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_maskformer": ["MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "MaskFormerConfig"],
25 | }
26 | 
27 | if is_vision_available():
28 |     _import_structure["feature_extraction_maskformer"] = ["MaskFormerFeatureExtractor"]
29 | 
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_maskformer"] = [
33 |         "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "MaskFormerForInstanceSegmentation",
35 |         "MaskFormerModel",
36 |         "MaskFormerPreTrainedModel",
37 |     ]
38 | 
39 | if TYPE_CHECKING:
40 |     from .configuration_maskformer import MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, MaskFormerConfig
41 | 
42 |     if is_vision_available():
43 |         from .feature_extraction_maskformer import MaskFormerFeatureExtractor
44 |     if is_torch_available():
45 |         from .modeling_maskformer import (
46 |             MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
47 |             MaskFormerForInstanceSegmentation,
48 |             MaskFormerModel,
49 |             MaskFormerPreTrainedModel,
50 |         )
51 | 
52 | 
53 | else:
54 |     import sys
55 | 
56 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
57 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/mbart50/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_sentencepiece_available, is_tokenizers_available
21 | 
22 | 
23 | _import_structure = {}
24 | 
25 | if is_sentencepiece_available():
26 |     _import_structure["tokenization_mbart50"] = ["MBart50Tokenizer"]
27 | 
28 | if is_tokenizers_available():
29 |     _import_structure["tokenization_mbart50_fast"] = ["MBart50TokenizerFast"]
30 | 
31 | 
32 | if TYPE_CHECKING:
33 |     if is_sentencepiece_available():
34 |         from .tokenization_mbart50 import MBart50Tokenizer
35 | 
36 |     if is_tokenizers_available():
37 |         from .tokenization_mbart50_fast import MBart50TokenizerFast
38 | 
39 | else:
40 |     import sys
41 | 
42 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
43 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/megatron_gpt2/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021  NVIDIA Corporation and The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/mluke/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_sentencepiece_available
22 | 
23 | 
24 | _import_structure = {}
25 | 
26 | 
27 | if is_sentencepiece_available():
28 |     _import_structure["tokenization_mluke"] = ["MLukeTokenizer"]
29 | 
30 | if TYPE_CHECKING:
31 |     if is_sentencepiece_available():
32 |         from .tokenization_mluke import MLukeTokenizer
33 | 
34 | 
35 | else:
36 |     import sys
37 | 
38 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
39 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/mmbt/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_mmbt": ["MMBTConfig"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_mmbt"] = ["MMBTForClassification", "MMBTModel", "ModalEmbeddings"]
30 | 
31 | 
32 | if TYPE_CHECKING:
33 |     from .configuration_mmbt import MMBTConfig
34 | 
35 |     if is_torch_available():
36 |         from .modeling_mmbt import MMBTForClassification, MMBTModel, ModalEmbeddings
37 | 
38 | else:
39 |     import sys
40 | 
41 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
42 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/mmbt/configuration_mmbt.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # Copyright (c) HuggingFace Inc. team.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ MMBT configuration"""
17 | 
18 | from ...utils import logging
19 | 
20 | 
21 | logger = logging.get_logger(__name__)
22 | 
23 | 
24 | class MMBTConfig(object):
25 |     """
26 |     This is the configuration class to store the configuration of a [`MMBTModel`]. It is used to instantiate a MMBT
27 |     model according to the specified arguments, defining the model architecture.
28 | 
29 |     Args:
30 |         config ([`PreTrainedConfig`]):
31 |             Config of the underlying Transformer models. Its values are copied over to use a single config.
32 |         num_labels (`int`, *optional*):
33 |             Size of final Linear layer for classification.
34 |         modal_hidden_size (`int`, *optional*, defaults to 2048):
35 |             Embedding dimension of the non-text modality encoder.
36 |     """
37 | 
38 |     def __init__(self, config, num_labels=None, modal_hidden_size=2048):
39 |         self.__dict__ = config.__dict__
40 |         self.modal_hidden_size = modal_hidden_size
41 |         if num_labels:
42 |             self.num_labels = num_labels
43 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | import torch
18 | 
19 | from transformers import MobileBertConfig, MobileBertForPreTraining, load_tf_weights_in_mobilebert
20 | from transformers.utils import logging
21 | 
22 | 
23 | logging.set_verbosity_info()
24 | 
25 | 
26 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, mobilebert_config_file, pytorch_dump_path):
27 |     # Initialise PyTorch model
28 |     config = MobileBertConfig.from_json_file(mobilebert_config_file)
29 |     print(f"Building PyTorch model from configuration: {config}")
30 |     model = MobileBertForPreTraining(config)
31 |     # Load weights from tf checkpoint
32 |     model = load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path)
33 |     # Save pytorch-model
34 |     print(f"Save PyTorch model to {pytorch_dump_path}")
35 |     torch.save(model.state_dict(), pytorch_dump_path)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     parser = argparse.ArgumentParser()
40 |     # Required parameters
41 |     parser.add_argument(
42 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
43 |     )
44 |     parser.add_argument(
45 |         "--mobilebert_config_file",
46 |         default=None,
47 |         type=str,
48 |         required=True,
49 |         help="The config json file corresponding to the pre-trained MobileBERT model. \n"
50 |         "This specifies the model architecture.",
51 |     )
52 |     parser.add_argument(
53 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
54 |     )
55 |     args = parser.parse_args()
56 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.mobilebert_config_file, args.pytorch_dump_path)
57 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/mobilebert/tokenization_mobilebert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | #
 3 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """Tokenization classes for MobileBERT."""
17 | 
18 | from ...utils import logging
19 | from ..bert.tokenization_bert import BertTokenizer
20 | 
21 | 
22 | logger = logging.get_logger(__name__)
23 | 
24 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
25 | 
26 | PRETRAINED_VOCAB_FILES_MAP = {
27 |     "vocab_file": {"mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/vocab.txt"}
28 | }
29 | 
30 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {"mobilebert-uncased": 512}
31 | 
32 | 
33 | PRETRAINED_INIT_CONFIGURATION = {}
34 | 
35 | 
36 | class MobileBertTokenizer(BertTokenizer):
37 |     r"""
38 |     Construct a MobileBERT tokenizer.
39 | 
40 |     [`MobileBertTokenizer`] is identical to [`BertTokenizer`] and runs end-to-end tokenization: punctuation splitting
41 |     and wordpiece.
42 | 
43 |     Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning parameters.
44 |     """
45 | 
46 |     vocab_files_names = VOCAB_FILES_NAMES
47 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
48 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
49 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
50 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/mobilebert/tokenization_mobilebert_fast.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | #
 3 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """Tokenization classes for MobileBERT."""
17 | 
18 | from ...utils import logging
19 | from ..bert.tokenization_bert_fast import BertTokenizerFast
20 | from .tokenization_mobilebert import MobileBertTokenizer
21 | 
22 | 
23 | logger = logging.get_logger(__name__)
24 | 
25 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.json"}
26 | 
27 | PRETRAINED_VOCAB_FILES_MAP = {
28 |     "vocab_file": {"mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/vocab.txt"},
29 |     "tokenizer_file": {
30 |         "mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/tokenizer.json"
31 |     },
32 | }
33 | 
34 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {"mobilebert-uncased": 512}
35 | 
36 | 
37 | PRETRAINED_INIT_CONFIGURATION = {}
38 | 
39 | 
40 | class MobileBertTokenizerFast(BertTokenizerFast):
41 |     r"""
42 |     Construct a "fast" MobileBERT tokenizer (backed by HuggingFace's *tokenizers* library).
43 | 
44 |     [`MobileBertTokenizerFast`] is identical to [`BertTokenizerFast`] and runs end-to-end tokenization: punctuation
45 |     splitting and wordpiece.
46 | 
47 |     Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning parameters.
48 |     """
49 | 
50 |     vocab_files_names = VOCAB_FILES_NAMES
51 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
52 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
53 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
54 |     slow_tokenizer_class = MobileBertTokenizer
55 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/nystromformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_tokenizers_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_nystromformer": ["NYSTROMFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "NystromformerConfig"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_nystromformer"] = [
30 |         "NYSTROMFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
31 |         "NystromformerForMaskedLM",
32 |         "NystromformerForMultipleChoice",
33 |         "NystromformerForQuestionAnswering",
34 |         "NystromformerForSequenceClassification",
35 |         "NystromformerForTokenClassification",
36 |         "NystromformerLayer",
37 |         "NystromformerModel",
38 |         "NystromformerPreTrainedModel",
39 |     ]
40 | 
41 | 
42 | if TYPE_CHECKING:
43 |     from .configuration_nystromformer import NYSTROMFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, NystromformerConfig
44 | 
45 |     if is_torch_available():
46 |         from .modeling_nystromformer import (
47 |             NYSTROMFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
48 |             NystromformerForMaskedLM,
49 |             NystromformerForMultipleChoice,
50 |             NystromformerForQuestionAnswering,
51 |             NystromformerForSequenceClassification,
52 |             NystromformerForTokenClassification,
53 |             NystromformerLayer,
54 |             NystromformerModel,
55 |             NystromformerPreTrainedModel,
56 |         )
57 | 
58 | 
59 | else:
60 |     import sys
61 | 
62 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
63 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/phobert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule
22 | 
23 | 
24 | _import_structure = {
25 |     "tokenization_phobert": ["PhobertTokenizer"],
26 | }
27 | 
28 | 
29 | if TYPE_CHECKING:
30 |     from .tokenization_phobert import PhobertTokenizer
31 | 
32 | else:
33 |     import sys
34 | 
35 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
36 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/plbart/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_sentencepiece_available, is_tokenizers_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_plbart": ["PLBART_PRETRAINED_CONFIG_ARCHIVE_MAP", "PLBartConfig"],
25 | }
26 | 
27 | if is_sentencepiece_available():
28 |     _import_structure["tokenization_plbart"] = ["PLBartTokenizer"]
29 | 
30 | if is_torch_available():
31 |     _import_structure["modeling_plbart"] = [
32 |         "PLBART_PRETRAINED_MODEL_ARCHIVE_LIST",
33 |         "PLBartForCausalLM",
34 |         "PLBartForConditionalGeneration",
35 |         "PLBartForSequenceClassification",
36 |         "PLBartModel",
37 |         "PLBartPreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_plbart import PLBART_PRETRAINED_CONFIG_ARCHIVE_MAP, PLBartConfig
43 | 
44 |     if is_sentencepiece_available():
45 |         from .tokenization_plbart import PLBartTokenizer
46 | 
47 |     if is_torch_available():
48 |         from .modeling_plbart import (
49 |             PLBART_PRETRAINED_MODEL_ARCHIVE_LIST,
50 |             PLBartForCausalLM,
51 |             PLBartForConditionalGeneration,
52 |             PLBartForSequenceClassification,
53 |             PLBartModel,
54 |             PLBartPreTrainedModel,
55 |         )
56 | 
57 | 
58 | else:
59 |     import sys
60 | 
61 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/poolformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_torch_available, is_vision_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_poolformer": ["POOLFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "PoolFormerConfig"],
26 | }
27 | 
28 | if is_vision_available():
29 |     _import_structure["feature_extraction_poolformer"] = ["PoolFormerFeatureExtractor"]
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_poolformer"] = [
33 |         "POOLFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "PoolFormerForImageClassification",
35 |         "PoolFormerModel",
36 |         "PoolFormerPreTrainedModel",
37 |     ]
38 | 
39 | 
40 | if TYPE_CHECKING:
41 |     from .configuration_poolformer import POOLFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, PoolFormerConfig
42 | 
43 |     if is_vision_available():
44 |         from .feature_extraction_poolformer import PoolFormerFeatureExtractor
45 | 
46 |     if is_torch_available():
47 |         from .modeling_poolformer import (
48 |             POOLFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
49 |             PoolFormerForImageClassification,
50 |             PoolFormerModel,
51 |             PoolFormerPreTrainedModel,
52 |         )
53 | 
54 | 
55 | else:
56 |     import sys
57 | 
58 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
59 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/prophetnet/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_prophetnet": ["PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP", "ProphetNetConfig"],
26 |     "tokenization_prophetnet": ["ProphetNetTokenizer"],
27 | }
28 | 
29 | if is_torch_available():
30 |     _import_structure["modeling_prophetnet"] = [
31 |         "PROPHETNET_PRETRAINED_MODEL_ARCHIVE_LIST",
32 |         "ProphetNetDecoder",
33 |         "ProphetNetEncoder",
34 |         "ProphetNetForCausalLM",
35 |         "ProphetNetForConditionalGeneration",
36 |         "ProphetNetModel",
37 |         "ProphetNetPreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_prophetnet import PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP, ProphetNetConfig
43 |     from .tokenization_prophetnet import ProphetNetTokenizer
44 | 
45 |     if is_torch_available():
46 |         from .modeling_prophetnet import (
47 |             PROPHETNET_PRETRAINED_MODEL_ARCHIVE_LIST,
48 |             ProphetNetDecoder,
49 |             ProphetNetEncoder,
50 |             ProphetNetForCausalLM,
51 |             ProphetNetForConditionalGeneration,
52 |             ProphetNetModel,
53 |             ProphetNetPreTrainedModel,
54 |         )
55 | 
56 | else:
57 |     import sys
58 | 
59 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
60 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/qdqbert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 NVIDIA Corporation and The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_qdqbert": ["QDQBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "QDQBertConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_qdqbert"] = [
29 |         "QDQBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "QDQBertForMaskedLM",
31 |         "QDQBertForMultipleChoice",
32 |         "QDQBertForNextSentencePrediction",
33 |         "QDQBertForQuestionAnswering",
34 |         "QDQBertForSequenceClassification",
35 |         "QDQBertForTokenClassification",
36 |         "QDQBertLayer",
37 |         "QDQBertLMHeadModel",
38 |         "QDQBertModel",
39 |         "QDQBertPreTrainedModel",
40 |         "load_tf_weights_in_qdqbert",
41 |     ]
42 | 
43 | 
44 | if TYPE_CHECKING:
45 |     from .configuration_qdqbert import QDQBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, QDQBertConfig
46 | 
47 |     if is_torch_available():
48 |         from .modeling_qdqbert import (
49 |             QDQBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
50 |             QDQBertForMaskedLM,
51 |             QDQBertForMultipleChoice,
52 |             QDQBertForNextSentencePrediction,
53 |             QDQBertForQuestionAnswering,
54 |             QDQBertForSequenceClassification,
55 |             QDQBertForTokenClassification,
56 |             QDQBertLayer,
57 |             QDQBertLMHeadModel,
58 |             QDQBertModel,
59 |             QDQBertPreTrainedModel,
60 |             load_tf_weights_in_qdqbert,
61 |         )
62 | 
63 | 
64 | else:
65 |     import sys
66 | 
67 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
68 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/rag/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_tf_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_rag": ["RagConfig"],
26 |     "retrieval_rag": ["RagRetriever"],
27 |     "tokenization_rag": ["RagTokenizer"],
28 | }
29 | 
30 | if is_torch_available():
31 |     _import_structure["modeling_rag"] = [
32 |         "RagModel",
33 |         "RagPreTrainedModel",
34 |         "RagSequenceForGeneration",
35 |         "RagTokenForGeneration",
36 |     ]
37 | 
38 | if is_tf_available():
39 |     _import_structure["modeling_tf_rag"] = [
40 |         "TFRagModel",
41 |         "TFRagPreTrainedModel",
42 |         "TFRagSequenceForGeneration",
43 |         "TFRagTokenForGeneration",
44 |     ]
45 | 
46 | 
47 | if TYPE_CHECKING:
48 |     from .configuration_rag import RagConfig
49 |     from .retrieval_rag import RagRetriever
50 |     from .tokenization_rag import RagTokenizer
51 | 
52 |     if is_torch_available():
53 |         from .modeling_rag import RagModel, RagPreTrainedModel, RagSequenceForGeneration, RagTokenForGeneration
54 | 
55 |     if is_tf_available():
56 |         from .modeling_tf_rag import (
57 |             TFRagModel,
58 |             TFRagPreTrainedModel,
59 |             TFRagSequenceForGeneration,
60 |             TFRagTokenForGeneration,
61 |         )
62 | 
63 | else:
64 |     import sys
65 | 
66 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
67 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/realm/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_tokenizers_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_realm": ["REALM_PRETRAINED_CONFIG_ARCHIVE_MAP", "RealmConfig"],
25 |     "tokenization_realm": ["RealmTokenizer"],
26 | }
27 | 
28 | if is_tokenizers_available():
29 |     _import_structure["tokenization_realm_fast"] = ["RealmTokenizerFast"]
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_realm"] = [
33 |         "REALM_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "RealmEmbedder",
35 |         "RealmForOpenQA",
36 |         "RealmKnowledgeAugEncoder",
37 |         "RealmPreTrainedModel",
38 |         "RealmReader",
39 |         "RealmScorer",
40 |         "load_tf_weights_in_realm",
41 |     ]
42 |     _import_structure["retrieval_realm"] = ["RealmRetriever"]
43 | 
44 | 
45 | if TYPE_CHECKING:
46 |     from .configuration_realm import REALM_PRETRAINED_CONFIG_ARCHIVE_MAP, RealmConfig
47 |     from .tokenization_realm import RealmTokenizer
48 | 
49 |     if is_tokenizers_available():
50 |         from .tokenization_realm import RealmTokenizerFast
51 | 
52 |     if is_torch_available():
53 |         from .modeling_realm import (
54 |             REALM_PRETRAINED_MODEL_ARCHIVE_LIST,
55 |             RealmEmbedder,
56 |             RealmForOpenQA,
57 |             RealmKnowledgeAugEncoder,
58 |             RealmPreTrainedModel,
59 |             RealmReader,
60 |             RealmScorer,
61 |             load_tf_weights_in_realm,
62 |         )
63 |         from .retrieval_realm import RealmRetriever
64 | 
65 | 
66 | else:
67 |     import sys
68 | 
69 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
70 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert RemBERT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | import torch
21 | 
22 | from transformers import RemBertConfig, RemBertModel, load_tf_weights_in_rembert
23 | from transformers.utils import logging
24 | 
25 | 
26 | logging.set_verbosity_info()
27 | 
28 | 
29 | def convert_rembert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = RemBertConfig.from_json_file(bert_config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 |     model = RemBertModel(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_rembert(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--rembert_config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained RemBERT model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_rembert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.rembert_config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/resnet/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_resnet": ["RESNET_PRETRAINED_CONFIG_ARCHIVE_MAP", "ResNetConfig"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_resnet"] = [
30 |         "RESNET_PRETRAINED_MODEL_ARCHIVE_LIST",
31 |         "ResNetForImageClassification",
32 |         "ResNetModel",
33 |         "ResNetPreTrainedModel",
34 |     ]
35 | 
36 | 
37 | if TYPE_CHECKING:
38 |     from .configuration_resnet import RESNET_PRETRAINED_CONFIG_ARCHIVE_MAP, ResNetConfig
39 | 
40 |     if is_torch_available():
41 |         from .modeling_resnet import (
42 |             RESNET_PRETRAINED_MODEL_ARCHIVE_LIST,
43 |             ResNetForImageClassification,
44 |             ResNetModel,
45 |             ResNetPreTrainedModel,
46 |         )
47 | 
48 | 
49 | else:
50 |     import sys
51 | 
52 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
53 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/retribert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_tokenizers_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_retribert": ["RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "RetriBertConfig"],
26 |     "tokenization_retribert": ["RetriBertTokenizer"],
27 | }
28 | 
29 | if is_tokenizers_available():
30 |     _import_structure["tokenization_retribert_fast"] = ["RetriBertTokenizerFast"]
31 | 
32 | if is_torch_available():
33 |     _import_structure["modeling_retribert"] = [
34 |         "RETRIBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
35 |         "RetriBertModel",
36 |         "RetriBertPreTrainedModel",
37 |     ]
38 | 
39 | 
40 | if TYPE_CHECKING:
41 |     from .configuration_retribert import RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, RetriBertConfig
42 |     from .tokenization_retribert import RetriBertTokenizer
43 | 
44 |     if is_tokenizers_available():
45 |         from .tokenization_retribert_fast import RetriBertTokenizerFast
46 | 
47 |     if is_torch_available():
48 |         from .modeling_retribert import (
49 |             RETRIBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
50 |             RetriBertModel,
51 |             RetriBertPreTrainedModel,
52 |         )
53 | 
54 | else:
55 |     import sys
56 | 
57 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
58 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/retribert/tokenization_retribert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for RetriBERT."""
16 | 
17 | from ...utils import logging
18 | from ..bert.tokenization_bert import BertTokenizer
19 | 
20 | 
21 | logger = logging.get_logger(__name__)
22 | 
23 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
24 | 
25 | PRETRAINED_VOCAB_FILES_MAP = {
26 |     "vocab_file": {
27 |         "yjernite/retribert-base-uncased": "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/vocab.txt",
28 |     }
29 | }
30 | 
31 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
32 |     "yjernite/retribert-base-uncased": 512,
33 | }
34 | 
35 | 
36 | PRETRAINED_INIT_CONFIGURATION = {
37 |     "yjernite/retribert-base-uncased": {"do_lower_case": True},
38 | }
39 | 
40 | 
41 | class RetriBertTokenizer(BertTokenizer):
42 |     r"""
43 |     Constructs a RetriBERT tokenizer.
44 | 
45 |     [`RetroBertTokenizer`] is identical to [`BertTokenizer`] and runs end-to-end tokenization: punctuation splitting
46 |     and wordpiece.
47 | 
48 |     Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning parameters.
49 |     """
50 | 
51 |     vocab_files_names = VOCAB_FILES_NAMES
52 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
53 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
54 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
55 |     model_input_names = ["input_ids", "attention_mask"]
56 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/retribert/tokenization_retribert_fast.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for RetriBERT."""
16 | 
17 | from ...utils import logging
18 | from ..bert.tokenization_bert_fast import BertTokenizerFast
19 | from .tokenization_retribert import RetriBertTokenizer
20 | 
21 | 
22 | logger = logging.get_logger(__name__)
23 | 
24 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.json"}
25 | 
26 | PRETRAINED_VOCAB_FILES_MAP = {
27 |     "vocab_file": {
28 |         "yjernite/retribert-base-uncased": "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/vocab.txt",
29 |     },
30 |     "tokenizer_file": {
31 |         "yjernite/retribert-base-uncased": "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/tokenizer.json",
32 |     },
33 | }
34 | 
35 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
36 |     "yjernite/retribert-base-uncased": 512,
37 | }
38 | 
39 | 
40 | PRETRAINED_INIT_CONFIGURATION = {
41 |     "yjernite/retribert-base-uncased": {"do_lower_case": True},
42 | }
43 | 
44 | 
45 | class RetriBertTokenizerFast(BertTokenizerFast):
46 |     r"""
47 |     Construct a "fast" RetriBERT tokenizer (backed by HuggingFace's *tokenizers* library).
48 | 
49 |     [`RetriBertTokenizerFast`] is identical to [`BertTokenizerFast`] and runs end-to-end tokenization: punctuation
50 |     splitting and wordpiece.
51 | 
52 |     Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning parameters.
53 |     """
54 | 
55 |     vocab_files_names = VOCAB_FILES_NAMES
56 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
57 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
58 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
59 |     slow_tokenizer_class = RetriBertTokenizer
60 |     model_input_names = ["input_ids", "attention_mask"]
61 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert RoFormer checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | import torch
21 | 
22 | from transformers import RoFormerConfig, RoFormerForMaskedLM, load_tf_weights_in_roformer
23 | from transformers.utils import logging
24 | 
25 | 
26 | logging.set_verbosity_info()
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = RoFormerConfig.from_json_file(bert_config_file)
32 |     print(f"Building PyTorch model from configuration: {config}")
33 |     model = RoFormerForMaskedLM(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_roformer(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print(f"Save PyTorch model to {pytorch_dump_path}")
40 |     torch.save(model.state_dict(), pytorch_dump_path, _use_new_zipfile_serialization=False)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--bert_config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained BERT model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.bert_config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/segformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available, is_vision_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_segformer": ["SEGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "SegformerConfig"],
25 | }
26 | 
27 | if is_vision_available():
28 |     _import_structure["feature_extraction_segformer"] = ["SegformerFeatureExtractor"]
29 | 
30 | if is_torch_available():
31 |     _import_structure["modeling_segformer"] = [
32 |         "SEGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
33 |         "SegformerDecodeHead",
34 |         "SegformerForImageClassification",
35 |         "SegformerForSemanticSegmentation",
36 |         "SegformerLayer",
37 |         "SegformerModel",
38 |         "SegformerPreTrainedModel",
39 |     ]
40 | 
41 | 
42 | if TYPE_CHECKING:
43 |     from .configuration_segformer import SEGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, SegformerConfig
44 | 
45 |     if is_vision_available():
46 |         from .feature_extraction_segformer import SegformerFeatureExtractor
47 | 
48 |     if is_torch_available():
49 |         from .modeling_segformer import (
50 |             SEGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
51 |             SegformerDecodeHead,
52 |             SegformerForImageClassification,
53 |             SegformerForSemanticSegmentation,
54 |             SegformerLayer,
55 |             SegformerModel,
56 |             SegformerPreTrainedModel,
57 |         )
58 | 
59 | 
60 | else:
61 |     import sys
62 | 
63 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
64 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/sew/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_sew": ["SEW_PRETRAINED_CONFIG_ARCHIVE_MAP", "SEWConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_sew"] = [
29 |         "SEW_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "SEWForCTC",
31 |         "SEWForSequenceClassification",
32 |         "SEWModel",
33 |         "SEWPreTrainedModel",
34 |     ]
35 | 
36 | if TYPE_CHECKING:
37 |     from .configuration_sew import SEW_PRETRAINED_CONFIG_ARCHIVE_MAP, SEWConfig
38 | 
39 |     if is_torch_available():
40 |         from .modeling_sew import (
41 |             SEW_PRETRAINED_MODEL_ARCHIVE_LIST,
42 |             SEWForCTC,
43 |             SEWForSequenceClassification,
44 |             SEWModel,
45 |             SEWPreTrainedModel,
46 |         )
47 | 
48 | 
49 | else:
50 |     import sys
51 | 
52 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
53 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/sew_d/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_sew_d": ["SEW_D_PRETRAINED_CONFIG_ARCHIVE_MAP", "SEWDConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_sew_d"] = [
29 |         "SEW_D_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "SEWDForCTC",
31 |         "SEWDForSequenceClassification",
32 |         "SEWDModel",
33 |         "SEWDPreTrainedModel",
34 |     ]
35 | 
36 | if TYPE_CHECKING:
37 |     from .configuration_sew_d import SEW_D_PRETRAINED_CONFIG_ARCHIVE_MAP, SEWDConfig
38 | 
39 |     if is_torch_available():
40 |         from .modeling_sew_d import (
41 |             SEW_D_PRETRAINED_MODEL_ARCHIVE_LIST,
42 |             SEWDForCTC,
43 |             SEWDForSequenceClassification,
44 |             SEWDModel,
45 |             SEWDPreTrainedModel,
46 |         )
47 | 
48 | 
49 | else:
50 |     import sys
51 | 
52 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
53 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/speech_encoder_decoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_flax_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_speech_encoder_decoder": ["SpeechEncoderDecoderConfig"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_speech_encoder_decoder"] = ["SpeechEncoderDecoderModel"]
30 | 
31 | if is_flax_available():
32 |     _import_structure["modeling_flax_speech_encoder_decoder"] = ["FlaxSpeechEncoderDecoderModel"]
33 | 
34 | if TYPE_CHECKING:
35 |     from .configuration_speech_encoder_decoder import SpeechEncoderDecoderConfig
36 | 
37 |     if is_torch_available():
38 |         from .modeling_speech_encoder_decoder import SpeechEncoderDecoderModel
39 | 
40 |     if is_flax_available():
41 |         from .modeling_flax_speech_encoder_decoder import FlaxSpeechEncoderDecoderModel
42 | 
43 | else:
44 |     import sys
45 | 
46 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
47 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/speech_to_text_2/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_sentencepiece_available, is_speech_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_speech_to_text_2": [
25 |         "SPEECH_TO_TEXT_2_PRETRAINED_CONFIG_ARCHIVE_MAP",
26 |         "Speech2Text2Config",
27 |     ],
28 |     "processing_speech_to_text_2": ["Speech2Text2Processor"],
29 |     "tokenization_speech_to_text_2": ["Speech2Text2Tokenizer"],
30 | }
31 | 
32 | 
33 | if is_torch_available():
34 |     _import_structure["modeling_speech_to_text_2"] = [
35 |         "SPEECH_TO_TEXT_2_PRETRAINED_MODEL_ARCHIVE_LIST",
36 |         "Speech2Text2ForCausalLM",
37 |         "Speech2Text2PreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_speech_to_text_2 import SPEECH_TO_TEXT_2_PRETRAINED_CONFIG_ARCHIVE_MAP, Speech2Text2Config
43 |     from .processing_speech_to_text_2 import Speech2Text2Processor
44 |     from .tokenization_speech_to_text_2 import Speech2Text2Tokenizer
45 | 
46 |     if is_torch_available():
47 |         from .modeling_speech_to_text_2 import (
48 |             SPEECH_TO_TEXT_2_PRETRAINED_MODEL_ARCHIVE_LIST,
49 |             Speech2Text2ForCausalLM,
50 |             Speech2Text2PreTrainedModel,
51 |         )
52 | 
53 | else:
54 |     import sys
55 | 
56 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
57 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/splinter/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_tokenizers_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_splinter": ["SPLINTER_PRETRAINED_CONFIG_ARCHIVE_MAP", "SplinterConfig"],
25 |     "tokenization_splinter": ["SplinterTokenizer"],
26 | }
27 | 
28 | if is_tokenizers_available():
29 |     _import_structure["tokenization_splinter_fast"] = ["SplinterTokenizerFast"]
30 | 
31 | if is_torch_available():
32 |     _import_structure["modeling_splinter"] = [
33 |         "SPLINTER_PRETRAINED_MODEL_ARCHIVE_LIST",
34 |         "SplinterForQuestionAnswering",
35 |         "SplinterLayer",
36 |         "SplinterModel",
37 |         "SplinterPreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_splinter import SPLINTER_PRETRAINED_CONFIG_ARCHIVE_MAP, SplinterConfig
43 |     from .tokenization_splinter import SplinterTokenizer
44 | 
45 |     if is_tokenizers_available():
46 |         from .tokenization_splinter_fast import SplinterTokenizerFast
47 | 
48 |     if is_torch_available():
49 |         from .modeling_splinter import (
50 |             SPLINTER_PRETRAINED_MODEL_ARCHIVE_LIST,
51 |             SplinterForQuestionAnswering,
52 |             SplinterLayer,
53 |             SplinterModel,
54 |             SplinterPreTrainedModel,
55 |         )
56 | 
57 | 
58 | else:
59 |     import sys
60 | 
61 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/squeezebert/tokenization_squeezebert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The SqueezeBert authors and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for SqueezeBERT."""
16 | 
17 | from ...utils import logging
18 | from ..bert.tokenization_bert import BertTokenizer
19 | 
20 | 
21 | logger = logging.get_logger(__name__)
22 | 
23 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
24 | 
25 | PRETRAINED_VOCAB_FILES_MAP = {
26 |     "vocab_file": {
27 |         "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/vocab.txt",
28 |         "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/vocab.txt",
29 |         "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/vocab.txt",
30 |     }
31 | }
32 | 
33 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
34 |     "squeezebert/squeezebert-uncased": 512,
35 |     "squeezebert/squeezebert-mnli": 512,
36 |     "squeezebert/squeezebert-mnli-headless": 512,
37 | }
38 | 
39 | 
40 | PRETRAINED_INIT_CONFIGURATION = {
41 |     "squeezebert/squeezebert-uncased": {"do_lower_case": True},
42 |     "squeezebert/squeezebert-mnli": {"do_lower_case": True},
43 |     "squeezebert/squeezebert-mnli-headless": {"do_lower_case": True},
44 | }
45 | 
46 | 
47 | class SqueezeBertTokenizer(BertTokenizer):
48 |     r"""
49 |     Constructs a SqueezeBert tokenizer.
50 | 
51 |     [`SqueezeBertTokenizer`] is identical to [`BertTokenizer`] and runs end-to-end tokenization: punctuation splitting
52 |     + wordpiece.
53 | 
54 |     Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning parameters.
55 |     """
56 | 
57 |     vocab_files_names = VOCAB_FILES_NAMES
58 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
59 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
60 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
61 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/swin/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_swin": ["SWIN_PRETRAINED_CONFIG_ARCHIVE_MAP", "SwinConfig"],
26 | }
27 | 
28 | 
29 | if is_torch_available():
30 |     _import_structure["modeling_swin"] = [
31 |         "SWIN_PRETRAINED_MODEL_ARCHIVE_LIST",
32 |         "SwinForImageClassification",
33 |         "SwinForMaskedImageModeling",
34 |         "SwinModel",
35 |         "SwinPreTrainedModel",
36 |     ]
37 | 
38 | 
39 | if TYPE_CHECKING:
40 |     from .configuration_swin import SWIN_PRETRAINED_CONFIG_ARCHIVE_MAP, SwinConfig
41 | 
42 |     if is_torch_available():
43 |         from .modeling_swin import (
44 |             SWIN_PRETRAINED_MODEL_ARCHIVE_LIST,
45 |             SwinForImageClassification,
46 |             SwinForMaskedImageModeling,
47 |             SwinModel,
48 |             SwinPreTrainedModel,
49 |         )
50 | 
51 | 
52 | else:
53 |     import sys
54 | 
55 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
56 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The T5 authors and HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert T5 checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | from transformers import T5Config, T5ForConditionalGeneration, load_tf_weights_in_t5
21 | from transformers.utils import logging
22 | 
23 | 
24 | logging.set_verbosity_info()
25 | 
26 | 
27 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
28 |     # Initialise PyTorch model
29 |     config = T5Config.from_json_file(config_file)
30 |     print(f"Building PyTorch model from configuration: {config}")
31 |     model = T5ForConditionalGeneration(config)
32 | 
33 |     # Load weights from tf checkpoint
34 |     load_tf_weights_in_t5(model, config, tf_checkpoint_path)
35 | 
36 |     # Save pytorch-model
37 |     print(f"Save PyTorch model to {pytorch_dump_path}")
38 |     model.save_pretrained(pytorch_dump_path)
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     parser = argparse.ArgumentParser()
43 |     # Required parameters
44 |     parser.add_argument(
45 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
46 |     )
47 |     parser.add_argument(
48 |         "--config_file",
49 |         default=None,
50 |         type=str,
51 |         required=True,
52 |         help="The config json file corresponding to the pre-trained T5 model. \n"
53 |         "This specifies the model architecture.",
54 |     )
55 |     parser.add_argument(
56 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
57 |     )
58 |     args = parser.parse_args()
59 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
60 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/trocr/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_sentencepiece_available, is_speech_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_trocr": [
25 |         "TROCR_PRETRAINED_CONFIG_ARCHIVE_MAP",
26 |         "TrOCRConfig",
27 |     ],
28 |     "processing_trocr": ["TrOCRProcessor"],
29 | }
30 | 
31 | 
32 | if is_torch_available():
33 |     _import_structure["modeling_trocr"] = [
34 |         "TROCR_PRETRAINED_MODEL_ARCHIVE_LIST",
35 |         "TrOCRForCausalLM",
36 |         "TrOCRPreTrainedModel",
37 |     ]
38 | 
39 | 
40 | if TYPE_CHECKING:
41 |     from .configuration_trocr import TROCR_PRETRAINED_CONFIG_ARCHIVE_MAP, TrOCRConfig
42 |     from .processing_trocr import TrOCRProcessor
43 | 
44 |     if is_torch_available():
45 |         from .modeling_trocr import TROCR_PRETRAINED_MODEL_ARCHIVE_LIST, TrOCRForCausalLM, TrOCRPreTrainedModel
46 | 
47 | else:
48 |     import sys
49 | 
50 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
51 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/unispeech/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_flax_available, is_tf_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_unispeech": ["UNISPEECH_PRETRAINED_CONFIG_ARCHIVE_MAP", "UniSpeechConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_unispeech"] = [
29 |         "UNISPEECH_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "UniSpeechForCTC",
31 |         "UniSpeechForPreTraining",
32 |         "UniSpeechForSequenceClassification",
33 |         "UniSpeechModel",
34 |         "UniSpeechPreTrainedModel",
35 |     ]
36 | 
37 | if TYPE_CHECKING:
38 |     from .configuration_unispeech import UNISPEECH_PRETRAINED_CONFIG_ARCHIVE_MAP, UniSpeechConfig
39 | 
40 |     if is_torch_available():
41 |         from .modeling_unispeech import (
42 |             UNISPEECH_PRETRAINED_MODEL_ARCHIVE_LIST,
43 |             UniSpeechForCTC,
44 |             UniSpeechForPreTraining,
45 |             UniSpeechForSequenceClassification,
46 |             UniSpeechModel,
47 |             UniSpeechPreTrainedModel,
48 |         )
49 | 
50 | else:
51 |     import sys
52 | 
53 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
54 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/unispeech_sat/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_flax_available, is_tf_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_unispeech_sat": ["UNISPEECH_SAT_PRETRAINED_CONFIG_ARCHIVE_MAP", "UniSpeechSatConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_unispeech_sat"] = [
29 |         "UNISPEECH_SAT_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "UniSpeechSatForAudioFrameClassification",
31 |         "UniSpeechSatForCTC",
32 |         "UniSpeechSatForPreTraining",
33 |         "UniSpeechSatForSequenceClassification",
34 |         "UniSpeechSatForXVector",
35 |         "UniSpeechSatModel",
36 |         "UniSpeechSatPreTrainedModel",
37 |     ]
38 | 
39 | if TYPE_CHECKING:
40 |     from .configuration_unispeech_sat import UNISPEECH_SAT_PRETRAINED_CONFIG_ARCHIVE_MAP, UniSpeechSatConfig
41 | 
42 |     if is_torch_available():
43 |         from .modeling_unispeech_sat import (
44 |             UNISPEECH_SAT_PRETRAINED_MODEL_ARCHIVE_LIST,
45 |             UniSpeechSatForAudioFrameClassification,
46 |             UniSpeechSatForCTC,
47 |             UniSpeechSatForPreTraining,
48 |             UniSpeechSatForSequenceClassification,
49 |             UniSpeechSatForXVector,
50 |             UniSpeechSatModel,
51 |             UniSpeechSatPreTrainedModel,
52 |         )
53 | 
54 | else:
55 |     import sys
56 | 
57 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
58 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/van/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_torch_available, is_vision_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_van": ["VAN_PRETRAINED_CONFIG_ARCHIVE_MAP", "VanConfig"],
26 | }
27 | 
28 | 
29 | if is_torch_available():
30 |     _import_structure["modeling_van"] = [
31 |         "VAN_PRETRAINED_MODEL_ARCHIVE_LIST",
32 |         "VanForImageClassification",
33 |         "VanModel",
34 |         "VanPreTrainedModel",
35 |     ]
36 | 
37 | if TYPE_CHECKING:
38 |     from .configuration_van import VAN_PRETRAINED_CONFIG_ARCHIVE_MAP, VanConfig
39 | 
40 |     if is_torch_available():
41 |         from .modeling_van import (
42 |             VAN_PRETRAINED_MODEL_ARCHIVE_LIST,
43 |             VanForImageClassification,
44 |             VanModel,
45 |             VanPreTrainedModel,
46 |         )
47 | 
48 | else:
49 |     import sys
50 | 
51 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
52 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/vilt/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_torch_available, is_vision_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_vilt": ["VILT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ViltConfig"],
26 | }
27 | 
28 | if is_vision_available():
29 |     _import_structure["feature_extraction_vilt"] = ["ViltFeatureExtractor"]
30 |     _import_structure["processing_vilt"] = ["ViltProcessor"]
31 | 
32 | if is_torch_available():
33 |     _import_structure["modeling_vilt"] = [
34 |         "VILT_PRETRAINED_MODEL_ARCHIVE_LIST",
35 |         "ViltForImageAndTextRetrieval",
36 |         "ViltForImagesAndTextClassification",
37 |         "ViltForMaskedLM",
38 |         "ViltForQuestionAnswering",
39 |         "ViltLayer",
40 |         "ViltModel",
41 |         "ViltPreTrainedModel",
42 |     ]
43 | 
44 | 
45 | if TYPE_CHECKING:
46 |     from .configuration_vilt import VILT_PRETRAINED_CONFIG_ARCHIVE_MAP, ViltConfig
47 | 
48 |     if is_vision_available():
49 |         from .feature_extraction_vilt import ViltFeatureExtractor
50 |         from .processing_vilt import ViltProcessor
51 | 
52 |     if is_torch_available():
53 |         from .modeling_vilt import (
54 |             VILT_PRETRAINED_MODEL_ARCHIVE_LIST,
55 |             ViltForImageAndTextRetrieval,
56 |             ViltForImagesAndTextClassification,
57 |             ViltForMaskedLM,
58 |             ViltForQuestionAnswering,
59 |             ViltLayer,
60 |             ViltModel,
61 |             ViltPreTrainedModel,
62 |         )
63 | 
64 | 
65 | else:
66 |     import sys
67 | 
68 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
69 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/vision_encoder_decoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_flax_available, is_tf_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_vision_encoder_decoder": ["VisionEncoderDecoderConfig"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_vision_encoder_decoder"] = ["VisionEncoderDecoderModel"]
30 | 
31 | if is_tf_available():
32 |     _import_structure["modeling_tf_vision_encoder_decoder"] = ["TFVisionEncoderDecoderModel"]
33 | 
34 | if is_flax_available():
35 |     _import_structure["modeling_flax_vision_encoder_decoder"] = ["FlaxVisionEncoderDecoderModel"]
36 | 
37 | if TYPE_CHECKING:
38 |     from .configuration_vision_encoder_decoder import VisionEncoderDecoderConfig
39 | 
40 |     if is_torch_available():
41 |         from .modeling_vision_encoder_decoder import VisionEncoderDecoderModel
42 | 
43 |     if is_tf_available():
44 |         from .modeling_tf_vision_encoder_decoder import TFVisionEncoderDecoderModel
45 | 
46 |     if is_flax_available():
47 |         from .modeling_flax_vision_encoder_decoder import FlaxVisionEncoderDecoderModel
48 | 
49 | else:
50 |     import sys
51 | 
52 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
53 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/vision_text_dual_encoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_flax_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_vision_text_dual_encoder": ["VisionTextDualEncoderConfig"],
26 |     "processing_vision_text_dual_encoder": ["VisionTextDualEncoderProcessor"],
27 | }
28 | 
29 | 
30 | if is_torch_available():
31 |     _import_structure["modeling_vision_text_dual_encoder"] = ["VisionTextDualEncoderModel"]
32 | 
33 | 
34 | if is_flax_available():
35 |     _import_structure["modeling_flax_vision_text_dual_encoder"] = ["FlaxVisionTextDualEncoderModel"]
36 | 
37 | 
38 | if TYPE_CHECKING:
39 |     from .configuration_vision_text_dual_encoder import VisionTextDualEncoderConfig
40 |     from .processing_visiotn_text_dual_encoder import VisionTextDualEncoderProcessor
41 | 
42 |     if is_torch_available():
43 |         from .modeling_vision_text_dual_encoder import VisionTextDualEncoderModel
44 | 
45 |     if is_flax_available():
46 |         from .modeling_vision_text_dual_encoder import FlaxVisionTextDualEncoderModel
47 | 
48 | 
49 | else:
50 |     import sys
51 | 
52 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
53 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/visual_bert/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_visual_bert": ["VISUAL_BERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "VisualBertConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_visual_bert"] = [
29 |         "VISUAL_BERT_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "VisualBertForMultipleChoice",
31 |         "VisualBertForPreTraining",
32 |         "VisualBertForQuestionAnswering",
33 |         "VisualBertForRegionToPhraseAlignment",
34 |         "VisualBertForVisualReasoning",
35 |         "VisualBertLayer",
36 |         "VisualBertModel",
37 |         "VisualBertPreTrainedModel",
38 |     ]
39 | 
40 | 
41 | if TYPE_CHECKING:
42 |     from .configuration_visual_bert import VISUAL_BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, VisualBertConfig
43 | 
44 |     if is_torch_available():
45 |         from .modeling_visual_bert import (
46 |             VISUAL_BERT_PRETRAINED_MODEL_ARCHIVE_LIST,
47 |             VisualBertForMultipleChoice,
48 |             VisualBertForPreTraining,
49 |             VisualBertForQuestionAnswering,
50 |             VisualBertForRegionToPhraseAlignment,
51 |             VisualBertForVisualReasoning,
52 |             VisualBertLayer,
53 |             VisualBertModel,
54 |             VisualBertPreTrainedModel,
55 |         )
56 | 
57 | 
58 | else:
59 |     import sys
60 | 
61 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
62 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/vit_mae/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_flax_available, is_tf_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_vit_mae": ["VIT_MAE_PRETRAINED_CONFIG_ARCHIVE_MAP", "ViTMAEConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_vit_mae"] = [
29 |         "VIT_MAE_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "ViTMAEForPreTraining",
31 |         "ViTMAELayer",
32 |         "ViTMAEModel",
33 |         "ViTMAEPreTrainedModel",
34 |     ]
35 | 
36 | if is_tf_available():
37 |     _import_structure["modeling_tf_vit_mae"] = [
38 |         "TFViTMAEForPreTraining",
39 |         "TFViTMAEModel",
40 |         "TFViTMAEPreTrainedModel",
41 |     ]
42 | 
43 | if TYPE_CHECKING:
44 |     from .configuration_vit_mae import VIT_MAE_PRETRAINED_CONFIG_ARCHIVE_MAP, ViTMAEConfig
45 | 
46 |     if is_torch_available():
47 |         from .modeling_vit_mae import (
48 |             VIT_MAE_PRETRAINED_MODEL_ARCHIVE_LIST,
49 |             ViTMAEForPreTraining,
50 |             ViTMAELayer,
51 |             ViTMAEModel,
52 |             ViTMAEPreTrainedModel,
53 |         )
54 | 
55 |     if is_tf_available():
56 |         from .modeling_tf_vit_mae import TFViTMAEForPreTraining, TFViTMAEModel, TFViTMAEPreTrainedModel
57 | 
58 | 
59 | else:
60 |     import sys
61 | 
62 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
63 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/wav2vec2_phoneme/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule
21 | 
22 | 
23 | # fmt: off
24 | _import_structure = {
25 |     "tokenization_wav2vec2_phoneme": ["Wav2Vec2PhonemeCTCTokenizer"]
26 | }
27 | # fmt: on
28 | 
29 | 
30 | if TYPE_CHECKING:
31 |     from .tokenization_wav2vec2_phoneme import Wav2Vec2PhonemeCTCTokenizer
32 | else:
33 |     import sys
34 | 
35 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
36 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/wav2vec2_with_lm/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule
21 | 
22 | 
23 | # fmt: off
24 | _import_structure = {
25 |     "processing_wav2vec2_with_lm": ["Wav2Vec2ProcessorWithLM"]
26 | }
27 | # fmt: on
28 | 
29 | 
30 | if TYPE_CHECKING:
31 |     from .processing_wav2vec2_with_lm import Wav2Vec2ProcessorWithLM
32 | else:
33 |     import sys
34 | 
35 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
36 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/wavlm/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_wavlm": ["WAVLM_PRETRAINED_CONFIG_ARCHIVE_MAP", "WavLMConfig"],
25 | }
26 | 
27 | if is_torch_available():
28 |     _import_structure["modeling_wavlm"] = [
29 |         "WAVLM_PRETRAINED_MODEL_ARCHIVE_LIST",
30 |         "WavLMForAudioFrameClassification",
31 |         "WavLMForCTC",
32 |         "WavLMForSequenceClassification",
33 |         "WavLMForXVector",
34 |         "WavLMModel",
35 |         "WavLMPreTrainedModel",
36 |     ]
37 | 
38 | if TYPE_CHECKING:
39 |     from .configuration_wavlm import WAVLM_PRETRAINED_CONFIG_ARCHIVE_MAP, WavLMConfig
40 | 
41 |     if is_torch_available():
42 |         from .modeling_wavlm import (
43 |             WAVLM_PRETRAINED_MODEL_ARCHIVE_LIST,
44 |             WavLMForAudioFrameClassification,
45 |             WavLMForCTC,
46 |             WavLMForSequenceClassification,
47 |             WavLMForXVector,
48 |             WavLMModel,
49 |             WavLMPreTrainedModel,
50 |         )
51 | 
52 | else:
53 |     import sys
54 | 
55 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
56 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/xlm_prophetnet/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | from ...utils import _LazyModule, is_sentencepiece_available, is_torch_available
21 | 
22 | 
23 | _import_structure = {
24 |     "configuration_xlm_prophetnet": [
25 |         "XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP",
26 |         "XLMProphetNetConfig",
27 |     ],
28 | }
29 | 
30 | if is_sentencepiece_available():
31 |     _import_structure["tokenization_xlm_prophetnet"] = ["XLMProphetNetTokenizer"]
32 | 
33 | if is_torch_available():
34 |     _import_structure["modeling_xlm_prophetnet"] = [
35 |         "XLM_PROPHETNET_PRETRAINED_MODEL_ARCHIVE_LIST",
36 |         "XLMProphetNetDecoder",
37 |         "XLMProphetNetEncoder",
38 |         "XLMProphetNetForCausalLM",
39 |         "XLMProphetNetForConditionalGeneration",
40 |         "XLMProphetNetModel",
41 |     ]
42 | 
43 | 
44 | if TYPE_CHECKING:
45 |     from .configuration_xlm_prophetnet import XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMProphetNetConfig
46 | 
47 |     if is_sentencepiece_available():
48 |         from .tokenization_xlm_prophetnet import XLMProphetNetTokenizer
49 | 
50 |     if is_torch_available():
51 |         from .modeling_xlm_prophetnet import (
52 |             XLM_PROPHETNET_PRETRAINED_MODEL_ARCHIVE_LIST,
53 |             XLMProphetNetDecoder,
54 |             XLMProphetNetEncoder,
55 |             XLMProphetNetForCausalLM,
56 |             XLMProphetNetForConditionalGeneration,
57 |             XLMProphetNetModel,
58 |         )
59 | 
60 | else:
61 |     import sys
62 | 
63 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
64 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Microsoft Authors and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ XLM-ProphetNet model configuration"""
16 | 
17 | 
18 | from ...utils import logging
19 | from ..prophetnet.configuration_prophetnet import ProphetNetConfig
20 | 
21 | 
22 | logger = logging.get_logger(__name__)
23 | 
24 | XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP = {
25 |     "microsoft/xprophetnet-large-wiki100-cased": "https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/resolve/main/config.json",
26 | }
27 | 
28 | 
29 | class XLMProphetNetConfig(ProphetNetConfig):
30 |     """
31 |     This class overrides [`ProphetNetConfig`]. Please check the superclass for the appropriate documentation alongside
32 |     usage examples.
33 |     """
34 | 
35 |     model_type = "xlm-prophetnet"
36 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/xlm_roberta_xl/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from typing import TYPE_CHECKING
20 | 
21 | from ...utils import _LazyModule, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_xlm_roberta_xl": [
26 |         "XLM_ROBERTA_XL_PRETRAINED_CONFIG_ARCHIVE_MAP",
27 |         "XLMRobertaXLConfig",
28 |         "XLMRobertaXLOnnxConfig",
29 |     ],
30 | }
31 | 
32 | if is_torch_available():
33 |     _import_structure["modeling_xlm_roberta_xl"] = [
34 |         "XLM_ROBERTA_XL_PRETRAINED_MODEL_ARCHIVE_LIST",
35 |         "XLMRobertaXLForCausalLM",
36 |         "XLMRobertaXLForMaskedLM",
37 |         "XLMRobertaXLForMultipleChoice",
38 |         "XLMRobertaXLForQuestionAnswering",
39 |         "XLMRobertaXLForSequenceClassification",
40 |         "XLMRobertaXLForTokenClassification",
41 |         "XLMRobertaXLModel",
42 |         "XLMRobertaXLPreTrainedModel",
43 |     ]
44 | 
45 | if TYPE_CHECKING:
46 |     from .configuration_xlm_roberta_xl import (
47 |         XLM_ROBERTA_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
48 |         XLMRobertaXLConfig,
49 |         XLMRobertaXLOnnxConfig,
50 |     )
51 | 
52 |     if is_torch_available():
53 |         from .modeling_xlm_roberta_xl import (
54 |             XLM_ROBERTA_XL_PRETRAINED_MODEL_ARCHIVE_LIST,
55 |             XLMRobertaXLForCausalLM,
56 |             XLMRobertaXLForMaskedLM,
57 |             XLMRobertaXLForMultipleChoice,
58 |             XLMRobertaXLForQuestionAnswering,
59 |             XLMRobertaXLForSequenceClassification,
60 |             XLMRobertaXLForTokenClassification,
61 |             XLMRobertaXLModel,
62 |             XLMRobertaXLPreTrainedModel,
63 |         )
64 | 
65 | else:
66 |     import sys
67 | 
68 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
69 | 


--------------------------------------------------------------------------------
/modeling/transformers/models/yoso/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | from typing import TYPE_CHECKING
19 | 
20 | # rely on isort to merge the imports
21 | from ...utils import _LazyModule, is_tokenizers_available, is_torch_available
22 | 
23 | 
24 | _import_structure = {
25 |     "configuration_yoso": ["YOSO_PRETRAINED_CONFIG_ARCHIVE_MAP", "YosoConfig"],
26 | }
27 | 
28 | if is_torch_available():
29 |     _import_structure["modeling_yoso"] = [
30 |         "YOSO_PRETRAINED_MODEL_ARCHIVE_LIST",
31 |         "YosoForMaskedLM",
32 |         "YosoForMultipleChoice",
33 |         "YosoForQuestionAnswering",
34 |         "YosoForSequenceClassification",
35 |         "YosoForTokenClassification",
36 |         "YosoLayer",
37 |         "YosoModel",
38 |         "YosoPreTrainedModel",
39 |     ]
40 | 
41 | 
42 | if TYPE_CHECKING:
43 |     from .configuration_yoso import YOSO_PRETRAINED_CONFIG_ARCHIVE_MAP, YosoConfig
44 | 
45 |     if is_torch_available():
46 |         from .modeling_yoso import (
47 |             YOSO_PRETRAINED_MODEL_ARCHIVE_LIST,
48 |             YosoForMaskedLM,
49 |             YosoForMultipleChoice,
50 |             YosoForQuestionAnswering,
51 |             YosoForSequenceClassification,
52 |             YosoForTokenClassification,
53 |             YosoLayer,
54 |             YosoModel,
55 |             YosoPreTrainedModel,
56 |         )
57 | 
58 | 
59 | else:
60 |     import sys
61 | 
62 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
63 | 


--------------------------------------------------------------------------------
/modeling/transformers/onnx/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from typing import TYPE_CHECKING
17 | 
18 | from ..utils import _LazyModule
19 | 
20 | 
21 | _import_structure = {
22 |     "config": [
23 |         "EXTERNAL_DATA_FORMAT_SIZE_LIMIT",
24 |         "OnnxConfig",
25 |         "OnnxConfigWithPast",
26 |         "OnnxSeq2SeqConfigWithPast",
27 |         "PatchingSpec",
28 |     ],
29 |     "convert": ["export", "validate_model_outputs"],
30 |     "features": ["FeaturesManager"],
31 |     "utils": ["ParameterFormat", "compute_serialized_parameters_size"],
32 | }
33 | 
34 | 
35 | if TYPE_CHECKING:
36 |     from .config import (
37 |         EXTERNAL_DATA_FORMAT_SIZE_LIMIT,
38 |         OnnxConfig,
39 |         OnnxConfigWithPast,
40 |         OnnxSeq2SeqConfigWithPast,
41 |         PatchingSpec,
42 |     )
43 |     from .convert import export, validate_model_outputs
44 |     from .features import FeaturesManager
45 |     from .utils import ParameterFormat, compute_serialized_parameters_size
46 | 
47 | else:
48 |     import sys
49 | 
50 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
51 | 


--------------------------------------------------------------------------------
/modeling/transformers/onnx/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from ctypes import c_float, sizeof
16 | from enum import Enum
17 | 
18 | 
19 | class ParameterFormat(Enum):
20 |     Float = c_float
21 | 
22 |     @property
23 |     def size(self) -> int:
24 |         """
25 |         Number of byte required for this data type
26 | 
27 |         Returns:
28 |             Integer > 0
29 |         """
30 |         return sizeof(self.value)
31 | 
32 | 
33 | def compute_effective_axis_dimension(dimension: int, fixed_dimension: int, num_token_to_add: int = 0) -> int:
34 |     """
35 | 
36 |     Args:
37 |         dimension:
38 |         fixed_dimension:
39 |         num_token_to_add:
40 | 
41 |     Returns:
42 | 
43 |     """
44 |     # < 0 is possible if using a dynamic axis
45 |     if dimension <= 0:
46 |         dimension = fixed_dimension
47 | 
48 |     dimension -= num_token_to_add
49 |     return dimension
50 | 
51 | 
52 | def compute_serialized_parameters_size(num_parameters: int, dtype: ParameterFormat) -> int:
53 |     """
54 |     Compute the size taken by all the parameters in the given the storage format when serializing the model
55 | 
56 |     Args:
57 |         num_parameters: Number of parameters to be saved
58 |         dtype: The data format each parameter will be saved
59 | 
60 |     Returns:
61 |         Size (in byte) taken to save all the parameters
62 |     """
63 |     return num_parameters * dtype.size
64 | 


--------------------------------------------------------------------------------
/modeling/transformers/py.typed:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/modeling/transformers/pytorch_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | from packaging import version
17 | from torch import _softmax_backward_data
18 | 
19 | from .utils import logging
20 | 
21 | 
22 | logger = logging.get_logger(__name__)
23 | 
24 | is_torch_less_than_1_8 = version.parse(torch.__version__) < version.parse("1.8.0")
25 | is_torch_less_than_1_11 = version.parse(torch.__version__) < version.parse("1.11")
26 | 
27 | 
28 | def torch_int_div(tensor1, tensor2):
29 |     """
30 |     A function that performs integer division across different versions of PyTorch.
31 |     """
32 |     if is_torch_less_than_1_8:
33 |         return tensor1 // tensor2
34 |     else:
35 |         return torch.div(tensor1, tensor2, rounding_mode="floor")
36 | 
37 | 
38 | def softmax_backward_data(parent, grad_output, output, dim, self):
39 |     """
40 |     A function that calls the internal `_softmax_backward_data` PyTorch method and that adjusts the arguments according
41 |     to the torch version detected.
42 |     """
43 | 
44 |     if is_torch_less_than_1_11:
45 |         return _softmax_backward_data(grad_output, output, parent.dim, self)
46 |     else:
47 |         return _softmax_backward_data(grad_output, output, parent.dim, self.dtype)
48 | 


--------------------------------------------------------------------------------
/modeling/transformers/sagemaker/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | from .trainer_sm import SageMakerTrainer
20 | from .training_args_sm import SageMakerTrainingArguments, is_sagemaker_dp_enabled
21 | 


--------------------------------------------------------------------------------
/modeling/transformers/sagemaker/trainer_sm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import warnings
15 | 
16 | from ..trainer import Trainer
17 | from ..utils import logging
18 | 
19 | 
20 | logger = logging.get_logger(__name__)
21 | 
22 | 
23 | class SageMakerTrainer(Trainer):
24 |     def __init__(self, args=None, **kwargs):
25 |         warnings.warn(
26 |             "`SageMakerTrainer` is deprecated and will be removed in v5 of Transformers. You can use `Trainer` "
27 |             "instead.",
28 |             FutureWarning,
29 |         )
30 |         super().__init__(args=args, **kwargs)
31 | 


--------------------------------------------------------------------------------
/modeling/transformers/tf_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import List, Union
16 | 
17 | import numpy as np
18 | import tensorflow as tf
19 | 
20 | from .utils import logging
21 | 
22 | 
23 | logger = logging.get_logger(__name__)
24 | 
25 | 
26 | def set_tensor_by_indices_to_value(tensor: tf.Tensor, indices: tf.Tensor, value: Union[tf.Tensor, int, float]):
27 |     # create value_tensor since tensor value assignment is not possible in TF
28 |     return tf.where(indices, value, tensor)
29 | 
30 | 
31 | def shape_list(tensor: Union[tf.Tensor, np.ndarray]) -> List[int]:
32 |     """
33 |     Deal with dynamic shape in tensorflow cleanly.
34 | 
35 |     Args:
36 |         tensor (`tf.Tensor` or `np.ndarray`): The tensor we want the shape of.
37 | 
38 |     Returns:
39 |         `List[int]`: The shape of the tensor as a list.
40 |     """
41 |     if isinstance(tensor, np.ndarray):
42 |         return list(tensor.shape)
43 | 
44 |     dynamic = tf.shape(tensor)
45 | 
46 |     if tensor.shape == tf.TensorShape(None):
47 |         return dynamic
48 | 
49 |     static = tensor.shape.as_list()
50 | 
51 |     return [dynamic[i] if s is None else s for i, s in enumerate(static)]
52 | 


--------------------------------------------------------------------------------
/modeling/transformers/utils/dummy_detectron2_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import requires_backends
 3 | 
 4 | 
 5 | LAYOUTLM_V2_PRETRAINED_MODEL_ARCHIVE_LIST = None
 6 | 
 7 | 
 8 | class LayoutLMv2Model:
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_backends(self, ["detectron2"])
11 | 
12 |     @classmethod
13 |     def from_pretrained(cls, *args, **kwargs):
14 |         requires_backends(cls, ["detectron2"])
15 | 


--------------------------------------------------------------------------------
/modeling/transformers/utils/dummy_scatter_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | from ..utils import DummyObject, requires_backends
 4 | 
 5 | 
 6 | TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST = None
 7 | 
 8 | 
 9 | class TapasForMaskedLM(metaclass=DummyObject):
10 |     _backends = ["scatter"]
11 | 
12 |     def __init__(self, *args, **kwargs):
13 |         requires_backends(self, ["scatter"])
14 | 
15 | 
16 | class TapasForQuestionAnswering(metaclass=DummyObject):
17 |     _backends = ["scatter"]
18 | 
19 |     def __init__(self, *args, **kwargs):
20 |         requires_backends(self, ["scatter"])
21 | 
22 | 
23 | class TapasForSequenceClassification(metaclass=DummyObject):
24 |     _backends = ["scatter"]
25 | 
26 |     def __init__(self, *args, **kwargs):
27 |         requires_backends(self, ["scatter"])
28 | 
29 | 
30 | class TapasModel(metaclass=DummyObject):
31 |     _backends = ["scatter"]
32 | 
33 |     def __init__(self, *args, **kwargs):
34 |         requires_backends(self, ["scatter"])
35 | 
36 | 
37 | class TapasPreTrainedModel(metaclass=DummyObject):
38 |     _backends = ["scatter"]
39 | 
40 |     def __init__(self, *args, **kwargs):
41 |         requires_backends(self, ["scatter"])
42 | 
43 | 
44 | def load_tf_weights_in_tapas(*args, **kwargs):
45 |     requires_backends(load_tf_weights_in_tapas, ["scatter"])
46 | 


--------------------------------------------------------------------------------
/modeling/transformers/utils/dummy_sentencepiece_and_speech_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | from ..utils import DummyObject, requires_backends
 4 | 
 5 | 
 6 | class Speech2TextProcessor(metaclass=DummyObject):
 7 |     _backends = ["sentencepiece", "speech"]
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_backends(self, ["sentencepiece", "speech"])
11 | 


--------------------------------------------------------------------------------
/modeling/transformers/utils/dummy_sentencepiece_and_tokenizers_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | from ..utils import DummyObject, requires_backends
 4 | 
 5 | 
 6 | SLOW_TO_FAST_CONVERTERS = None
 7 | 
 8 | 
 9 | def convert_slow_tokenizer(*args, **kwargs):
10 |     requires_backends(convert_slow_tokenizer, ["sentencepiece", "tokenizers"])
11 | 


--------------------------------------------------------------------------------
/modeling/transformers/utils/dummy_speech_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | from ..utils import DummyObject, requires_backends
 4 | 
 5 | 
 6 | class Speech2TextFeatureExtractor(metaclass=DummyObject):
 7 |     _backends = ["speech"]
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_backends(self, ["speech"])
11 | 


--------------------------------------------------------------------------------
/modeling/transformers/utils/dummy_timm_and_vision_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | from ..utils import DummyObject, requires_backends
 4 | 
 5 | 
 6 | DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None
 7 | 
 8 | 
 9 | class DetrForObjectDetection(metaclass=DummyObject):
10 |     _backends = ["timm", "vision"]
11 | 
12 |     def __init__(self, *args, **kwargs):
13 |         requires_backends(self, ["timm", "vision"])
14 | 
15 | 
16 | class DetrForSegmentation(metaclass=DummyObject):
17 |     _backends = ["timm", "vision"]
18 | 
19 |     def __init__(self, *args, **kwargs):
20 |         requires_backends(self, ["timm", "vision"])
21 | 
22 | 
23 | class DetrModel(metaclass=DummyObject):
24 |     _backends = ["timm", "vision"]
25 | 
26 |     def __init__(self, *args, **kwargs):
27 |         requires_backends(self, ["timm", "vision"])
28 | 
29 | 
30 | class DetrPreTrainedModel(metaclass=DummyObject):
31 |     _backends = ["timm", "vision"]
32 | 
33 |     def __init__(self, *args, **kwargs):
34 |         requires_backends(self, ["timm", "vision"])
35 | 


--------------------------------------------------------------------------------
/modeling/transformers/utils/dummy_timm_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import requires_backends
 3 | 
 4 | 
 5 | DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None
 6 | 
 7 | 
 8 | class DetrForObjectDetection:
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_backends(self, ["timm"])
11 | 
12 |     @classmethod
13 |     def from_pretrained(cls, *args, **kwargs):
14 |         requires_backends(cls, ["timm"])
15 | 
16 | 
17 | class DetrForSegmentation:
18 |     def __init__(self, *args, **kwargs):
19 |         requires_backends(self, ["timm"])
20 | 
21 |     @classmethod
22 |     def from_pretrained(cls, *args, **kwargs):
23 |         requires_backends(cls, ["timm"])
24 | 
25 | 
26 | class DetrModel:
27 |     def __init__(self, *args, **kwargs):
28 |         requires_backends(self, ["timm"])
29 | 
30 |     @classmethod
31 |     def from_pretrained(cls, *args, **kwargs):
32 |         requires_backends(cls, ["timm"])
33 | 


--------------------------------------------------------------------------------
/modeling/transformers/utils/model_parallel_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from math import ceil
17 | 
18 | 
19 | def assert_device_map(device_map, num_blocks):
20 |     blocks = list(range(0, num_blocks))
21 | 
22 |     device_map_blocks = [item for sublist in list(device_map.values()) for item in sublist]
23 | 
24 |     # Duplicate check
25 |     duplicate_blocks = []
26 |     for i in device_map_blocks:
27 |         if device_map_blocks.count(i) > 1 and i not in duplicate_blocks:
28 |             duplicate_blocks.append(i)
29 |     # Missing blocks
30 |     missing_blocks = [i for i in blocks if i not in device_map_blocks]
31 |     extra_blocks = [i for i in device_map_blocks if i not in blocks]
32 | 
33 |     if len(duplicate_blocks) != 0:
34 |         raise ValueError(
35 |             "Duplicate attention blocks specified in device_map. Attention blocks must be specified to one device. These "
36 |             "attention blocks were specified more than once: " + str(duplicate_blocks)
37 |         )
38 |     if len(missing_blocks) != 0:
39 |         raise ValueError(
40 |             "There are attention blocks for this model that are not specified in the device_map. Add these attention "
41 |             "blocks to a device on the device_map: " + str(missing_blocks)
42 |         )
43 |     if len(extra_blocks) != 0:
44 |         raise ValueError(
45 |             "The device_map contains more attention blocks than this model has. Remove these from the device_map:"
46 |             + str(extra_blocks)
47 |         )
48 | 
49 | 
50 | def get_device_map(n_layers, devices):
51 |     """Returns a dictionary of layers distributed evenly across all devices."""
52 |     layers = list(range(n_layers))
53 |     n_blocks = int(ceil(n_layers / len(devices)))
54 |     layers_list = list(layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks))
55 | 
56 |     return dict(zip(devices, layers_list))
57 | 


--------------------------------------------------------------------------------