├── tests
    ├── __init__.py
    ├── fixtures
    │   ├── empty.txt
    │   ├── dummy-config.json
    │   ├── input.txt
    │   ├── tests_samples
    │   │   ├── .gitignore
    │   │   ├── GermEval
    │   │   │   └── labels.txt
    │   │   ├── STS-B
    │   │   │   ├── train.tsv
    │   │   │   └── dev.tsv
    │   │   └── MRPC
    │   │   │   ├── dev.tsv
    │   │   │   └── train.tsv
    │   ├── spiece.model
    │   ├── test_sentencepiece.model
    │   └── hub-index.sample.json
    ├── test_adapter_saving.py
    ├── test_activations.py
    ├── test_adapter_fusion_saving.py
    ├── test_adapter_fusion_config.py
    ├── test_adapter_config.py
    ├── test_tokenization_utils.py
    └── test_tokenization_distilbert.py
├── MANIFEST.in
├── examples
    ├── summarization
    │   ├── __init__.py
    │   ├── t5
    │   │   ├── __init__.py
    │   │   ├── download_cnn_daily_mail.py
    │   │   ├── README.md
    │   │   └── test_t5_examples.py
    │   ├── bart
    │   │   ├── __init__.py
    │   │   ├── run_train.sh
    │   │   └── run_train_tiny.sh
    │   └── bertabs
    │   │   ├── __init__.py
    │   │   └── requirements.txt
    ├── translation
    │   └── t5
    │   │   ├── __init__.py
    │   │   └── test_t5_examples.py
    ├── ner
    │   └── .gitignore
    ├── distillation
    │   ├── requirements.txt
    │   └── training_configs
    │   │   ├── distilgpt2.json
    │   │   ├── distilbert-base-cased.json
    │   │   ├── distilbert-base-uncased.json
    │   │   ├── distilbert-base-multilingual-cased.json
    │   │   └── distilroberta-base.json
    ├── text-generation
    │   ├── pplm
    │   │   ├── imgs
    │   │   │   ├── wooly.png
    │   │   │   └── headfigure.png
    │   │   └── pplm_classification_head.py
    │   └── README.md
    ├── movement-pruning
    │   ├── emmental
    │   │   ├── modules
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   └── requirements.txt
    ├── requirements.txt
    ├── contrib
    │   ├── README.md
    │   └── mm-imdb
    │   │   └── README.md
    ├── token-classification
    │   ├── test_ner_examples.py
    │   ├── run.sh
    │   └── run_pl.sh
    ├── text-classification
    │   └── run_pl.sh
    ├── benchmarking
    │   └── run_benchmark.py
    ├── adversarial
    │   └── README.md
    └── multiple-choice
    │   └── README.md
├── docs
    ├── source
    │   ├── examples.md
    │   ├── notebooks.md
    │   ├── favicon.ico
    │   ├── _static
    │   │   └── css
    │   │   │   ├── Calibre-Thin.otf
    │   │   │   ├── Calibre-Light.ttf
    │   │   │   ├── Calibre-Medium.otf
    │   │   │   ├── Calibre-Regular.otf
    │   │   │   └── code-snippets.css
    │   ├── imgs
    │   │   ├── transformers_logo_name.png
    │   │   ├── warmup_cosine_schedule.png
    │   │   ├── warmup_linear_schedule.png
    │   │   ├── warmup_constant_schedule.png
    │   │   ├── warmup_cosine_hard_restarts_schedule.png
    │   │   └── warmup_cosine_warm_restarts_schedule.png
    │   ├── main_classes
    │   │   ├── configuration.rst
    │   │   └── model.rst
    │   ├── model_doc
    │   │   ├── encoderdecoder.rst
    │   │   └── auto.rst
    │   └── bertology.rst
    └── Makefile
├── model_cards
    ├── bert-base-chinese-README.md
    ├── bert-large-cased-README.md
    ├── bart-large-cnn
    │   └── README.md
    ├── bart-large-xsum
    │   └── README.md
    ├── bert-base-german-dbmdz-cased-README.md
    ├── bert-base-german-dbmdz-uncased-README.md
    ├── google
    │   ├── bert_uncased_L-10_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-10_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-10_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-12_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-12_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-12_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-2_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-4_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-4_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-4_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-4_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-6_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-6_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-6_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-6_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-8_H-128_A-2
    │   │   └── README.md
    │   ├── bert_uncased_L-8_H-256_A-4
    │   │   └── README.md
    │   ├── bert_uncased_L-8_H-512_A-8
    │   │   └── README.md
    │   ├── bert_uncased_L-8_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-10_H-768_A-12
    │   │   └── README.md
    │   ├── bert_uncased_L-12_H-768_A-12
    │   │   └── README.md
    │   ├── reformer-crime-and-punishment
    │   │   └── README.md
    │   ├── electra-large-generator
    │   │   └── README.md
    │   ├── electra-small-generator
    │   │   └── README.md
    │   └── electra-base-generator
    │   │   └── README.md
    ├── facebook
    │   └── bart-large-cnn
    │   │   └── README.md
    ├── bert-base-multilingual-cased-README.md
    ├── distilbert-base-multilingual-cased-README.md
    ├── bert-base-multilingual-uncased-README.md
    ├── t5-11b-README.md
    ├── t5-3b-README.md
    ├── t5-base-README.md
    ├── t5-large-README.md
    ├── t5-small-README.md
    ├── severinsimmler
    │   └── literary-german-bert
    │   │   ├── kfold.png
    │   │   └── prosa-jahre.png
    ├── deepset
    │   ├── sentence_bert
    │   │   └── README.md
    │   └── bert-base-german-cased-oldvocab
    │   │   └── README.md
    ├── djstrong
    │   └── bg_cs_pl_ru_cased_L-12_H-768_A-12
    │   │   └── README.md
    ├── gpt2-README.md
    ├── distilgpt2-README.md
    ├── roberta-base-README.md
    ├── xlm-roberta-base-README.md
    ├── albert-base-v1-README.md
    ├── albert-xxlarge-v2-README.md
    ├── bert-base-cased-README.md
    ├── bert-base-uncased-README.md
    ├── distilroberta-base-README.md
    ├── xlm-mlm-en-2048-README.md
    ├── distilbert-base-uncased-README.md
    ├── binwang
    │   └── xlnet-base-cased
    │   │   └── README.md
    ├── daigo
    │   └── bert-base-japanese-sentiment
    │   │   └── README.md
    ├── lvwerra
    │   ├── gpt2-medium-taboo
    │   │   └── README.md
    │   ├── bert-imdb
    │   │   └── README.md
    │   ├── gpt2-imdb
    │   │   └── README.md
    │   ├── gpt2-imdb-pos
    │   │   └── README.md
    │   └── gpt2-imdb-ctrl
    │   │   └── README.md
    ├── lysandre
    │   ├── arxiv
    │   │   └── README.md
    │   └── arxiv-nlp
    │   │   └── README.md
    ├── Hate-speech-CNERG
    │   ├── dehatebert-mono-arabic
    │   │   └── README.md
    │   └── dehatebert-mono-english
    │   │   └── README.md
    ├── jannesg
    │   └── bertsson
    │   │   └── README.md
    ├── DeepPavlov
    │   ├── rubert-base-cased
    │   │   └── README.md
    │   ├── bert-base-bg-cs-pl-ru-cased
    │   │   └── README.md
    │   ├── rubert-base-cased-conversational
    │   │   └── README.md
    │   ├── rubert-base-cased-sentence
    │   │   └── README.md
    │   ├── bert-base-multilingual-cased-sentence
    │   │   └── README.md
    │   └── bert-base-cased-conversational
    │   │   └── README.md
    ├── julien-c
    │   ├── bert-xsmall-dummy
    │   │   └── README.md
    │   ├── EsperBERTo-small-pos
    │   │   └── README.md
    │   ├── dummy-unknown
    │   │   └── README.md
    │   └── EsperBERTo-small
    │   │   └── README.md
    ├── spentaur
    │   └── yelp
    │   │   └── README.md
    ├── allenai
    │   ├── longformer-base-4096-extra.pos.embd.only
    │   │   └── README.md
    │   ├── scibert_scivocab_cased
    │   │   └── README.md
    │   ├── scibert_scivocab_uncased
    │   │   └── README.md
    │   ├── longformer-base-4096
    │   │   └── README.md
    │   └── biomed_roberta_base
    │   │   └── README.md
    ├── codegram
    │   └── calbert-base-uncased
    │   │   └── README.md
    ├── clue
    │   ├── xlnet_chinese_large
    │   │   └── README.md
    │   ├── roberta_chinese_base
    │   │   └── README.md
    │   ├── roberta_chinese_large
    │   │   └── README.md
    │   ├── albert_chinese_tiny
    │   │   └── README.md
    │   └── albert_chinese_small
    │   │   └── README.md
    ├── ViktorAlm
    │   └── electra-base-norwegian-uncased-discriminator
    │   │   └── README.md
    ├── canwenxu
    │   └── BERT-of-Theseus-MNLI
    │   │   └── README.md
    ├── surajp
    │   └── albert-base-sanskrit
    │   │   └── README.md
    ├── wptoux
    │   └── albert-chinese-large-qa
    │   │   └── README.md
    ├── illuin
    │   ├── camembert-base-fquad
    │   │   └── README.md
    │   └── camembert-large-fquad
    │   │   └── README.md
    ├── jplu
    │   ├── tf-camembert-base
    │   │   └── README.md
    │   ├── tf-xlm-roberta-base
    │   │   └── README.md
    │   └── tf-xlm-roberta-large
    │   │   └── README.md
    ├── twmkn9
    │   ├── albert-base-v2-squad2
    │   │   └── README.md
    │   ├── bert-base-uncased-squad2
    │   │   └── README.md
    │   ├── distilroberta-base-squad2
    │   │   └── README.md
    │   └── distilbert-base-uncased-squad2
    │   │   └── README.md
    ├── digitalepidemiologylab
    │   └── covid-twitter-bert
    │   │   └── README.md
    ├── fmikaelian
    │   ├── camembert-base-fquad
    │   │   └── README.md
    │   ├── camembert-base-squad
    │   │   └── README.md
    │   └── flaubert-base-uncased-squad
    │   │   └── README.md
    ├── activebus
    │   ├── BERT-DK_rest
    │   │   └── README.md
    │   ├── BERT-PT_rest
    │   │   └── README.md
    │   ├── BERT-PT_laptop
    │   │   └── README.md
    │   └── BERT-DK_laptop
    │   │   └── README.md
    ├── monologg
    │   ├── koelectra-base-generator
    │   │   └── README.md
    │   ├── koelectra-small-generator
    │   │   └── README.md
    │   ├── koelectra-base-discriminator
    │   │   └── README.md
    │   └── koelectra-small-discriminator
    │   │   └── README.md
    ├── ixa-ehu
    │   └── berteus-base-cased
    │   │   └── README.md
    ├── ahotrod
    │   └── roberta_large_squad2
    │   │   └── README.md
    ├── valhalla
    │   └── t5-base-squad
    │   │   └── README.md
    ├── Tereveni-AI
    │   └── gpt2-124M-uk-fiction
    │   │   └── README.md
    ├── voidful
    │   ├── albert_chinese_base
    │   │   └── README.md
    │   ├── albert_chinese_large
    │   │   └── README.md
    │   ├── albert_chinese_xlarge
    │   │   └── README.md
    │   ├── albert_chinese_xxlarge
    │   │   └── README.md
    │   ├── albert_chinese_tiny
    │   │   └── README.md
    │   └── albert_chinese_small
    │   │   └── README.md
    ├── gaochangkuan
    │   └── model_dir
    │   │   └── README.md
    ├── allegro
    │   └── herbert-klej-cased-tokenizer-v1
    │   │   └── README.md
    └── nlptown
    │   └── bert-base-multilingual-uncased-sentiment
    │       └── README.md
├── adapter_docs
    ├── logo.png
    ├── favicon.png
    ├── adapter_types.md
    ├── classes
    │   ├── adapter_modules.rst
    │   ├── adapter_config.rst
    │   ├── bert_mixins.rst
    │   ├── adapter_utils.rst
    │   ├── model_mixins.rst
    │   ├── weights_loaders.rst
    │   └── roberta.rst
    ├── _static
    │   └── custom.css
    ├── Makefile
    ├── README.md
    ├── installation.md
    └── make.bat
├── .coveragerc
├── src
    └── transformers
    │   ├── data
    │       ├── datasets
    │       │   └── __init__.py
    │       ├── processors
    │       │   └── __init__.py
    │       └── __init__.py
    │   ├── commands
    │       ├── __init__.py
    │       ├── transformers_cli.py
    │       └── download.py
    │   ├── benchmark
    │       └── __init__.py
    │   ├── trainer_utils.py
    │   ├── configuration_marian.py
    │   ├── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
    │   ├── configuration_camembert.py
    │   ├── configuration_mmbt.py
    │   └── activations.py
├── templates
    └── adding_a_new_example_script
    │   └── README.md
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── new-adapter-setup.md
    │   ├── feature-request.md
    │   └── bug-report.md
    └── workflows
    │   ├── adapter_docs_build.yml
    │   └── tests_torch.yml
├── deploy_multi_version_doc.sh
├── docker
    ├── transformers-pytorch-cpu
    │   └── Dockerfile
    ├── transformers-tensorflow-cpu
    │   └── Dockerfile
    ├── transformers-cpu
    │   └── Dockerfile
    ├── transformers-pytorch-gpu
    │   └── Dockerfile
    ├── transformers-tensorflow-gpu
    │   └── Dockerfile
    └── transformers-gpu
    │   └── Dockerfile
├── setup.cfg
├── .circleci
    └── deploy.sh
└── Makefile


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/empty.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | 


--------------------------------------------------------------------------------
/examples/summarization/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/summarization/t5/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/translation/t5/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/summarization/bart/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/summarization/bertabs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | ../../examples/README.md


--------------------------------------------------------------------------------
/docs/source/notebooks.md:
--------------------------------------------------------------------------------
1 | ../../notebooks/README.md


--------------------------------------------------------------------------------
/tests/fixtures/dummy-config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "model_type": "roberta"
3 | }


--------------------------------------------------------------------------------
/model_cards/bert-base-chinese-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: chinese
3 | ---
4 | 


--------------------------------------------------------------------------------
/model_cards/bert-large-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: apache-2.0
3 | ---
4 | 


--------------------------------------------------------------------------------
/examples/ner/.gitignore:
--------------------------------------------------------------------------------
1 | *.tmp
2 | cached_*
3 | *.txt
4 | preprocess.*
5 | *.ps1
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/input.txt:
--------------------------------------------------------------------------------
1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer
2 | 


--------------------------------------------------------------------------------
/model_cards/bart-large-cnn/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | ---
5 | 
6 | 


--------------------------------------------------------------------------------
/model_cards/bart-large-xsum/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | ---
5 | 
6 | 


--------------------------------------------------------------------------------
/examples/summarization/bertabs/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | # For ROUGE
4 | nltk
5 | py-rouge
6 | 


--------------------------------------------------------------------------------
/model_cards/bert-base-german-dbmdz-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: german
3 | license: mit
4 | ---
5 | 


--------------------------------------------------------------------------------
/model_cards/bert-base-german-dbmdz-uncased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: german
3 | license: mit
4 | ---
5 | 


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/facebook/bart-large-cnn/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | 
5 | license: mit
6 | ---
7 | 


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/.gitignore:
--------------------------------------------------------------------------------
1 | *.*
2 | cache*
3 | temp*
4 | !*.txt
5 | !*.tsv
6 | !*.json
7 | !.gitignore


--------------------------------------------------------------------------------
/adapter_docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/adapter_docs/logo.png


--------------------------------------------------------------------------------
/docs/source/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/favicon.ico


--------------------------------------------------------------------------------
/adapter_docs/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/adapter_docs/favicon.png


--------------------------------------------------------------------------------
/model_cards/bert-base-multilingual-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: multilingual
3 | 
4 | license: apache-2.0
5 | ---
6 | 


--------------------------------------------------------------------------------
/model_cards/distilbert-base-multilingual-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: multilingual
3 | license: apache-2.0
4 | ---
5 | 


--------------------------------------------------------------------------------
/model_cards/bert-base-multilingual-uncased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: multilingual
3 | 
4 | license: apache-2.0
5 | ---
6 | 


--------------------------------------------------------------------------------
/model_cards/t5-11b-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 | 
6 | license: apache-2.0
7 | ---
8 | 
9 | 


--------------------------------------------------------------------------------
/model_cards/t5-3b-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 | 
6 | license: apache-2.0
7 | ---
8 | 
9 | 


--------------------------------------------------------------------------------
/model_cards/t5-base-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 | 
6 | license: apache-2.0
7 | ---
8 | 
9 | 


--------------------------------------------------------------------------------
/model_cards/t5-large-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 | 
6 | license: apache-2.0
7 | ---
8 | 
9 | 


--------------------------------------------------------------------------------
/model_cards/t5-small-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 | 
6 | license: apache-2.0
7 | ---
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/fixtures/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/tests/fixtures/spiece.model


--------------------------------------------------------------------------------
/adapter_docs/adapter_types.md:
--------------------------------------------------------------------------------
1 | # Adapter Types
2 | 
3 | TODO write something about different adapter types and configurations.
4 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Thin.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Thin.otf


--------------------------------------------------------------------------------
/tests/fixtures/test_sentencepiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/tests/fixtures/test_sentencepiece.model


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Light.ttf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Medium.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Regular.otf


--------------------------------------------------------------------------------
/docs/source/imgs/transformers_logo_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/transformers_logo_name.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_linear_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_linear_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_constant_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_constant_schedule.png


--------------------------------------------------------------------------------
/examples/distillation/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | gitpython==3.0.2
4 | tensorboard>=1.14.0
5 | tensorboardX==1.8
6 | psutil==5.6.6
7 | scipy==1.3.1
8 | 


--------------------------------------------------------------------------------
/examples/text-generation/pplm/imgs/wooly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/examples/text-generation/pplm/imgs/wooly.png


--------------------------------------------------------------------------------
/examples/text-generation/pplm/imgs/headfigure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/examples/text-generation/pplm/imgs/headfigure.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png


--------------------------------------------------------------------------------
/model_cards/severinsimmler/literary-german-bert/kfold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/model_cards/severinsimmler/literary-german-bert/kfold.png


--------------------------------------------------------------------------------
/examples/movement-pruning/emmental/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer
3 | from .masked_nn import MaskedLinear
4 | 


--------------------------------------------------------------------------------
/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png


--------------------------------------------------------------------------------
/model_cards/deepset/sentence_bert/README.md:
--------------------------------------------------------------------------------
1 | This is an upload of the bert-base-nli-stsb-mean-tokens pretrained model from the Sentence Transformers Repo (https://github.com/UKPLab/sentence-transformers)
2 | 


--------------------------------------------------------------------------------
/adapter_docs/classes/adapter_modules.rst:
--------------------------------------------------------------------------------
1 | Adapter Modules
2 | ===============
3 | 
4 | Classes implementing task and language adapters.
5 | 
6 | .. automodule:: transformers.adapter_modeling
7 |     :members:
8 | 


--------------------------------------------------------------------------------
/model_cards/djstrong/bg_cs_pl_ru_cased_L-12_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | Slavic BERT from https://github.com/deepmipt/Slavic-BERT-NER http://files.deeppavlov.ai/deeppavlov_data/bg_cs_pl_ru_cased_L-12_H-768_A-12.tar.gz
2 | 


--------------------------------------------------------------------------------
/examples/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorboard
 2 | scikit-learn
 3 | seqeval
 4 | psutil
 5 | sacrebleu
 6 | rouge-score
 7 | tensorflow_datasets
 8 | pytorch-lightning==0.7.3  # April 10, 2020 release
 9 | matplotlib
10 | 


--------------------------------------------------------------------------------
/examples/movement-pruning/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.4.0
2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers
3 | knockknock>=0.1.8.1
4 | h5py>=2.10.0
5 | numpy>=1.18.2
6 | scipy>=1.4.1
7 | 


--------------------------------------------------------------------------------
/model_cards/gpt2-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: mit
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=gpt2">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/adapter_docs/classes/adapter_config.rst:
--------------------------------------------------------------------------------
1 | Model Adapters Config
2 | =======================
3 | 
4 | This class manages the setup and configuration of adapter modules in a pre-trained model.
5 | 
6 | .. autoclass:: transformers.ModelAdaptersConfig
7 |     :members:
8 | 


--------------------------------------------------------------------------------
/adapter_docs/classes/bert_mixins.rst:
--------------------------------------------------------------------------------
1 | BERT Mixins
2 | ====================
3 | 
4 | These classes added to the BERT module classes add support for adapters to all BERT-based transformer models.
5 | 
6 | .. automodule:: transformers.adapter_bert
7 |     :members:
8 | 


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilgpt2.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"initializer_range": 0.02,
 3 | 	"layer_norm_epsilon": 0.00001,
 4 | 	"n_ctx": 1024,
 5 | 	"n_embd": 768,
 6 | 	"n_head": 12,
 7 | 	"n_layer": 6,
 8 | 	"n_positions": 1024,
 9 | 	"vocab_size": 50257
10 | }


--------------------------------------------------------------------------------
/model_cards/distilgpt2-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=distilgpt2">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/roberta-base-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: mit
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=roberta-base">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source=transformers
 3 | omit =
 4 |     # skip convertion scripts from testing for now
 5 |     */convert_*
 6 |     */__main__.py
 7 | [report]
 8 | exclude_lines =
 9 |     pragma: no cover
10 |     raise
11 |     except
12 |     register_parameter


--------------------------------------------------------------------------------
/adapter_docs/classes/adapter_utils.rst:
--------------------------------------------------------------------------------
1 | Adapter Utilities
2 | ====================
3 | 
4 | A collection of utility methods mainly related to searching and loading adapter modules from
5 | Adapter-Hub.
6 | 
7 | .. automodule:: transformers.adapter_utils
8 |     :members:
9 | 


--------------------------------------------------------------------------------
/model_cards/xlm-roberta-base-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: mit
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=xlm-roberta-base">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/albert-base-v1-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=albert-base-v1">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/albert-xxlarge-v2-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=albert-xxlarge-v2">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>


--------------------------------------------------------------------------------
/model_cards/bert-base-cased-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=bert-base-cased">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/bert-base-uncased-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=bert-base-uncased">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/distilroberta-base-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=distilroberta-base">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/xlm-mlm-en-2048-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: cc-by-nc-4.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=xlm-mlm-en-2048">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/distilbert-base-uncased-README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - exbert
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=distilbert-base-uncased">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 


--------------------------------------------------------------------------------
/model_cards/binwang/xlnet-base-cased/README.md:
--------------------------------------------------------------------------------
1 | This model is pre-trained **XLNET** with 12 layers.
2 | 
3 | It comes with paper: SBERT-WK: A Sentence Embedding Method By Dissecting BERT-based Word Models
4 | 
5 | Project Page: [SBERT-WK](https://github.com/BinWang28/SBERT-WK-Sentence-Embedding)
6 | 


--------------------------------------------------------------------------------
/examples/contrib/README.md:
--------------------------------------------------------------------------------
1 | # Community contributed examples
2 | 
3 | This folder contains examples which are not actively maintained (mostly contributed by the community).
4 | 
5 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working.
6 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/code-snippets.css:
--------------------------------------------------------------------------------
 1 | 
 2 | .highlight .c1, .highlight .sd{
 3 |     color: #999
 4 | }
 5 | 
 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc {
 7 |     color: #FB8D68;
 8 | }
 9 | 
10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow {
11 |     color: #6670FF;
12 | }


--------------------------------------------------------------------------------
/src/transformers/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 | 
5 | from .glue import GlueDataset, GlueDataTrainingArguments
6 | from .language_modeling import LineByLineTextDataset, TextDataset
7 | 


--------------------------------------------------------------------------------
/model_cards/daigo/bert-base-japanese-sentiment/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - japanese
 4 | ---
 5 | 
 6 | binary classification
 7 | 
 8 | # Usage
 9 | ```
10 | print(pipeline("sentiment-analysis",model="daigo/bert-base-japanese-sentiment",tokenizer="daigo/bert-base-japanese-sentiment")("私は幸福である。"))
11 | 
12 | [{'label': 'ポジティブ', 'score': 0.98430425}]
13 | ```
14 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/GermEval/labels.txt:
--------------------------------------------------------------------------------
 1 | B-LOC
 2 | B-LOCderiv
 3 | B-LOCpart
 4 | B-ORG
 5 | B-ORGderiv
 6 | B-ORGpart
 7 | B-OTH
 8 | B-OTHderiv
 9 | B-OTHpart
10 | B-PER
11 | B-PERderiv
12 | B-PERpart
13 | I-LOC
14 | I-LOCderiv
15 | I-LOCpart
16 | I-ORG
17 | I-ORGderiv
18 | I-ORGpart
19 | I-OTH
20 | I-OTHderiv
21 | I-OTHpart
22 | I-PER
23 | I-PERderiv
24 | I-PERpart
25 | O
26 | 


--------------------------------------------------------------------------------
/examples/movement-pruning/emmental/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from .configuration_bert_masked import MaskedBertConfig
 3 | from .modeling_bert_masked import (
 4 |     MaskedBertForMultipleChoice,
 5 |     MaskedBertForQuestionAnswering,
 6 |     MaskedBertForSequenceClassification,
 7 |     MaskedBertForTokenClassification,
 8 |     MaskedBertModel,
 9 | )
10 | from .modules import *
11 | 


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 28996
14 |   }
15 |   


--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-medium-taboo/README.md:
--------------------------------------------------------------------------------
 1 | # GPT-2 (medium) Taboo
 2 | 
 3 | ## What is it?
 4 | A fine-tuned GPT-2 version for Taboo cards generation.
 5 | 
 6 | ## Training setting
 7 | 
 8 | The model was trained on ~900 Taboo cards in the following format for 100 epochs:
 9 | ```
10 | Describe the word Glitch without using the words Problem, Unexpected, Technology, Minor, Outage.
11 | ````
12 | 
13 | 


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-uncased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 30522
14 |   }
15 |   


--------------------------------------------------------------------------------
/src/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from argparse import ArgumentParser
 3 | 
 4 | 
 5 | class BaseTransformersCLICommand(ABC):
 6 |     @staticmethod
 7 |     @abstractmethod
 8 |     def register_subcommand(parser: ArgumentParser):
 9 |         raise NotImplementedError()
10 | 
11 |     @abstractmethod
12 |     def run(self):
13 |         raise NotImplementedError()
14 | 


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-multilingual-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 119547
14 |   }
15 |   


--------------------------------------------------------------------------------
/model_cards/lysandre/arxiv/README.md:
--------------------------------------------------------------------------------
1 | # ArXiv GPT-2 checkpoint
2 | 
3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` finetuned to ArXiv paper on physics fields.
4 | 
5 | ## Training data
6 | 
7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 130MB of text, mostly from quantum physics (quant-ph) and other physics sub-fields.
8 | 


--------------------------------------------------------------------------------
/src/transformers/benchmark/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | from ..file_utils import is_torch_available
 6 | 
 7 | 
 8 | if is_torch_available():
 9 |     from .benchmark_args import PyTorchBenchmarkArguments
10 |     from .benchmark import PyTorchBenchmark
11 | 


--------------------------------------------------------------------------------
/model_cards/lysandre/arxiv-nlp/README.md:
--------------------------------------------------------------------------------
1 | # ArXiv-NLP GPT-2 checkpoint
2 | 
3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` fine-tuned to ArXiv paper on the computational linguistics field.
4 | 
5 | ## Training data
6 | 
7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 80MB of text from the computational linguistics (cs.CL) field.


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilroberta-base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "vocab_size": 50265,
 3 |     "hidden_size": 768,
 4 |     "num_hidden_layers": 6,
 5 |     "num_attention_heads": 12,
 6 |     "intermediate_size": 3072,
 7 |     "hidden_act": "gelu",
 8 |     "hidden_dropout_prob": 0.1,
 9 |     "attention_probs_dropout_prob": 0.1,
10 |     "max_position_embeddings": 514,
11 |     "type_vocab_size": 1,
12 |     "initializer_range": 0.02,
13 |     "layer_norm_eps": 0.00001
14 | }


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-arabic/README.md:
--------------------------------------------------------------------------------
1 | This model is used detecting **hatespeech** in **Arabic language**. The mono in the name refers to the monolingual setting, where the model is trained using only Arabic language data. It is finetuned on multilingual bert model.
2 | The model is trained with different learning rates and the best validation score achieved is 0.8674776 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
3 | 


--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-english/README.md:
--------------------------------------------------------------------------------
1 | This model is used detecting **hatespeech** in **English language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
2 | The model is trained with different learning rates and the best validation score achieved is 0.7069374 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
3 | 


--------------------------------------------------------------------------------
/docs/source/main_classes/configuration.rst:
--------------------------------------------------------------------------------
 1 | Configuration
 2 | ----------------------------------------------------
 3 | 
 4 | The base class ``PretrainedConfig`` implements the common methods for loading/saving a configuration either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository).
 5 | 
 6 | ``PretrainedConfig``
 7 | ~~~~~~~~~~~~~~~~~~~~~
 8 | 
 9 | .. autoclass:: transformers.PretrainedConfig
10 |     :members:
11 | 


--------------------------------------------------------------------------------
/model_cards/jannesg/bertsson/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: swedish
 3 | ---
 4 | 
 5 | # BERTSSON Models
 6 | 
 7 | The models are trained on:
 8 | - Government Text
 9 | - Swedish Literature
10 | - Swedish News
11 | 
12 | Corpus size: Roughly 6B tokens.
13 | 
14 | The following models are currently available:
15 | 
16 | - **bertsson** - A BERT base model trained with the same hyperparameters as first published by Google.
17 | 
18 | All models are cased and trained with whole word masking.
19 | 
20 | Stay tuned for evaluations. 
21 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_example_script/README.md:
--------------------------------------------------------------------------------
1 | # How to add a new example script in 🤗Transformers
2 | 
3 | This folder provide a template for adding a new example script implementing a training or inference task with the models in the  🤗Transformers library.
4 | 
5 | Currently only examples for PyTorch are provided which are adaptations of the library's SQuAD examples which implement single-GPU and distributed training with gradient accumulation and mixed-precision (using NVIDIA's apex library) to cover a reasonable range of use cases.
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/new-adapter-setup.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F31F New adapter setup"
 3 | about: Submit a proposal/request to implement a new adapter setup or to add a new model
 4 | title: ''
 5 | labels: 'enhancement'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🌟 New adapter setup
11 | 
12 | ## Model description
13 | 
14 | <!-- Important information -->
15 | 
16 | ## Open source status
17 | 
18 | * [ ] the model implementation is available: (give details)
19 | * [ ] the model weights are available: (give details)
20 | * [ ] who are the authors: (mention them, if possible by @gh-username)
21 | 


--------------------------------------------------------------------------------
/adapter_docs/classes/model_mixins.rst:
--------------------------------------------------------------------------------
 1 | Model Mixins
 2 | =======================
 3 | 
 4 | These classes provide the basis of adapter module integration into model classes such as adapter saving and loading.
 5 | Depending on the model, one of these mixins should be implemented by every adapter-supporting model class.
 6 | 
 7 | ModelAdaptersMixin
 8 | ------------------
 9 | 
10 | .. autoclass:: transformers.ModelAdaptersMixin
11 |     :members:
12 | 
13 | ModelWithHeadsAdaptersMixin
14 | ---------------------------
15 | 
16 | .. autoclass:: transformers.ModelWithHeadsAdaptersMixin
17 |     :members:
18 | 


--------------------------------------------------------------------------------
/examples/summarization/bart/run_train.sh:
--------------------------------------------------------------------------------
 1 | export OUTPUT_DIR_NAME=bart_sum
 2 | export CURRENT_DIR=${PWD}
 3 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
 4 | 
 5 | # Make output directory if it doesn't exist
 6 | mkdir -p $OUTPUT_DIR
 7 | 
 8 | # Add parent directory to python path to access lightning_base.py
 9 | export PYTHONPATH="../../":"${PYTHONPATH}"
10 | 
11 | python finetune.py \
12 | --data_dir=./cnn-dailymail/cnn_dm \
13 | --model_name_or_path=bart-large \
14 | --learning_rate=3e-5 \
15 | --train_batch_size=4 \
16 | --eval_batch_size=4 \
17 | --output_dir=$OUTPUT_DIR \
18 | --do_train  $@
19 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - russian
 4 | ---
 5 | 
 6 | # rubert-base-cased
 7 | 
 8 | RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on the Russian part of Wikipedia and news data. We used this training data to build a vocabulary of Russian subtokens and took a multilingual version of BERT‑base as an initialization for RuBERT\[1\].
 9 | 
10 | 
11 | \[1\]: Kuratov, Y., Arkhipov, M. \(2019\). Adaptation of Deep Bidirectional Multilingual Transformers for Russian Language. arXiv preprint [arXiv:1905.07213](https://arxiv.org/abs/1905.07213).
12 | 


--------------------------------------------------------------------------------
/tests/fixtures/hub-index.sample.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "t": {
 3 |         "s": {
 4 |             "default": "path/to/default",
 5 |             "bb1c8efb82510bed": {
 6 |                 "default": "path/to/pfeiffer/default",
 7 |                 "versions": {
 8 |                     "example-org": "path/to/pfeiffer/example-org",
 9 |                     "ukp": "path/to/pfeiffer/ukp"
10 |                 }
11 |             },
12 |             "b1017368d7a97b11": {
13 |                 "versions": {
14 |                     "example-org": "path/to/houlsby/example-org"
15 |                 }
16 |             }
17 |         }
18 |     }
19 | }


--------------------------------------------------------------------------------
/src/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 | 
5 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
6 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
7 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
8 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
9 | 


--------------------------------------------------------------------------------
/examples/text-generation/README.md:
--------------------------------------------------------------------------------
 1 | ## Language generation
 2 | 
 3 | Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/text-generation/run_generation.py).
 4 | 
 5 | Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL, XLNet, CTRL.
 6 | A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
 7 | can try out the different models available in the library.
 8 | 
 9 | Example usage:
10 | 
11 | ```bash
12 | python run_generation.py \
13 |     --model_type=gpt2 \
14 |     --model_name_or_path=gpt2
15 | ```
16 | 


--------------------------------------------------------------------------------
/src/transformers/trainer_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, NamedTuple, Optional
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class EvalPrediction(NamedTuple):
 7 |     """
 8 |     Evaluation output (always contains labels), to be used
 9 |     to compute metrics.
10 |     """
11 | 
12 |     predictions: np.ndarray
13 |     label_ids: np.ndarray
14 | 
15 | 
16 | class PredictionOutput(NamedTuple):
17 |     predictions: np.ndarray
18 |     label_ids: Optional[np.ndarray]
19 |     metrics: Optional[Dict[str, float]]
20 | 
21 | 
22 | class TrainOutput(NamedTuple):
23 |     global_step: int
24 |     training_loss: float
25 | 
26 | 
27 | PREFIX_CHECKPOINT_DIR = "checkpoint"
28 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/deploy_multi_version_doc.sh:
--------------------------------------------------------------------------------
 1 | cd docs
 2 | 
 3 | function deploy_doc(){
 4 | 	echo "Creating doc at commit $1 and pushing to folder $2"
 5 | 	git checkout $1
 6 | 	if [ ! -z "$2" ] 
 7 | 	then
 8 | 		echo "Pushing version" $2
 9 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
10 | 	else
11 | 		echo "Pushing master"
12 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
13 | 	fi
14 | }
15 | 
16 | deploy_doc "master" 
17 | deploy_doc "b33a385" v1.0.0
18 | deploy_doc "fe02e45" v1.1.0
19 | deploy_doc "89fd345" v1.2.0
20 | deploy_doc "fc9faa8" v2.0.0
21 | deploy_doc "3ddce1d" v2.1.1
22 | deploy_doc "f2f3294" v2.2.0
23 | deploy_doc "d0f8b9a" v2.3.0
24 | 


--------------------------------------------------------------------------------
/model_cards/lvwerra/bert-imdb/README.md:
--------------------------------------------------------------------------------
 1 | # BERT-IMDB
 2 | 
 3 | ## What is it?
 4 | BERT (`bert-large-cased`) trained for sentiment classification on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews).
 5 | 
 6 | ## Training setting
 7 | 
 8 | The model was trained on 80% of the IMDB dataset for sentiment classification for three epochs with a learning rate of `1e-5` with the `simpletransformers` library. The library uses a learning rate schedule.
 9 | 
10 | ## Result
11 | The model achieved 90% classification accuracy on the validation set.
12 | 
13 | ## Reference
14 | The full experiment is available in the [tlr repo](https://lvwerra.github.io/trl/03-bert-imdb-training/).
15 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-bg-cs-pl-ru-cased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - bulgarian
 4 | - czech
 5 | - polish
 6 | - russian
 7 | ---
 8 | 
 9 | # bert-base-bg-cs-pl-ru-cased
10 | 
11 | SlavicBERT\[1\] \(Slavic \(bg, cs, pl, ru\), cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on Russian News and four Wikipedias: Bulgarian, Czech, Polish, and Russian. Subtoken vocabulary was built using this data. Multilingual BERT was used as an initialization for SlavicBERT.
12 | 
13 | 
14 | \[1\]: Arkhipov M., Trofimova M., Kuratov Y., Sorokin A. \(2019\). [Tuning Multilingual Transformers for Language-Specific Named Entity Recognition](https://www.aclweb.org/anthology/W19-3712/). ACL anthology W19-3712.
15 | 


--------------------------------------------------------------------------------
/adapter_docs/_static/custom.css:
--------------------------------------------------------------------------------
 1 | /* The search field on top of the toc tree */
 2 | /* Mobile header */
 3 | .wy-side-nav-search, .wy-nav-top {
 4 |     background: #39B3C6;
 5 | }
 6 | /* toc tree text */
 7 | .wy-menu-vertical header,
 8 | .wy-menu-vertical p.caption {
 9 |     color: #39B3C6
10 | }
11 | /* toc tree activated link */
12 | .wy-menu-vertical a:active {
13 |     background-color:#39B3C6;
14 | }
15 | /* Links */
16 | a {
17 |     color: #39B3C6
18 | }
19 | /* Source spans */
20 | .rst-content .viewcode-link, .rst-content .viewcode-back{
21 |     color: #39B3C6;
22 | }
23 | /* The literal code blocks */
24 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {
25 |     color: #39B3C6;
26 | }
27 | 


--------------------------------------------------------------------------------
/adapter_docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docker/transformers-pytorch-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     torch
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-tensorflow-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     tensorflow-cpu
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     tensorflow-cpu \
19 |     torch
20 | 
21 | WORKDIR /workspace
22 | COPY . transformers/
23 | RUN cd transformers/ && \
24 |     python3 -m pip install --no-cache-dir .
25 | 
26 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/docker/transformers-pytorch-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     torch
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/adapter_docs/README.md:
--------------------------------------------------------------------------------
 1 | # The adapter-transformers documentation
 2 | 
 3 | This is the documentation of the adapter-related parts of the transformers library and the Adapter-Hub. Huggingface's documentation of the base library is located in the `/docs` folder.
 4 | 
 5 | ## Installing & Building
 6 | 
 7 | Building the documentation requires some additional packages installed. You can install them by running the following command in the root folder:
 8 | 
 9 | ```bash
10 | pip install -e ".[docs]"
11 | ```
12 | 
13 | Cleaning and regenerating the documentation files can be done using `sphinx` by running the following command in the `/adapter_docs` folder:
14 | 
15 | ```bash
16 | make clean && make html
17 | ```
18 | 
19 | The build output will be located in `/adapter_docs/_build/html`.
20 | 


--------------------------------------------------------------------------------
/docker/transformers-tensorflow-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     mkl \
18 |     tensorflow
19 | 
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 |     python3 -m pip install --no-cache-dir .
24 | 
25 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/examples/text-generation/pplm/pplm_classification_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class ClassificationHead(torch.nn.Module):
 5 |     """Classification Head for  transformer encoders"""
 6 | 
 7 |     def __init__(self, class_size, embed_size):
 8 |         super().__init__()
 9 |         self.class_size = class_size
10 |         self.embed_size = embed_size
11 |         # self.mlp1 = torch.nn.Linear(embed_size, embed_size)
12 |         # self.mlp2 = (torch.nn.Linear(embed_size, class_size))
13 |         self.mlp = torch.nn.Linear(embed_size, class_size)
14 | 
15 |     def forward(self, hidden_state):
16 |         # hidden_state = F.relu(self.mlp1(hidden_state))
17 |         # hidden_state = self.mlp2(hidden_state)
18 |         logits = self.mlp(hidden_state)
19 |         return logits
20 | 


--------------------------------------------------------------------------------
/examples/contrib/mm-imdb/README.md:
--------------------------------------------------------------------------------
 1 | ## MM-IMDb
 2 | 
 3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/master/examples/contrib/mm-imdb/run_mmimdb.py).
 4 | 
 5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata.
 6 | 
 7 | ### Training on MM-IMDb
 8 | 
 9 | ```
10 | python run_mmimdb.py \
11 |     --data_dir /path/to/mmimdb/dataset/ \
12 |     --model_type bert \
13 |     --model_name_or_path bert-base-uncased \
14 |     --output_dir /path/to/save/dir/ \
15 |     --do_train \
16 |     --do_eval \
17 |     --max_seq_len 512 \
18 |     --gradient_accumulation_steps 20 \
19 |     --num_image_embeds 3 \
20 |     --num_train_epochs 100 \
21 |     --patience 5
22 | ```
23 | 
24 | 


--------------------------------------------------------------------------------
/docker/transformers-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
 2 | LABEL maintainer="Hugging Face"
 3 | LABEL repository="transformers"
 4 | 
 5 | RUN apt update && \
 6 |     apt install -y bash \
 7 |                    build-essential \
 8 |                    git \
 9 |                    curl \
10 |                    ca-certificates \
11 |                    python3 \
12 |                    python3-pip && \
13 |     rm -rf /var/lib/apt/lists
14 | 
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 |     python3 -m pip install --no-cache-dir \
17 |     jupyter \
18 |     tensorflow \
19 |     torch
20 | 
21 | WORKDIR /workspace
22 | COPY . transformers/
23 | RUN cd transformers/ && \
24 |     python3 -m pip install --no-cache-dir .
25 | 
26 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | ensure_newline_before_comments = True
 3 | force_grid_wrap = 0
 4 | include_trailing_comma = True
 5 | known_first_party = transformers
 6 | known_third_party =
 7 |     absl
 8 |     fairseq
 9 |     fastprogress
10 |     git
11 |     h5py
12 |     MeCab
13 |     nltk
14 |     numpy
15 |     packaging
16 |     PIL
17 |     psutil
18 |     pytorch_lightning
19 |     rouge_score
20 |     sacrebleu
21 |     seqeval
22 |     sklearn
23 |     tensorboardX
24 |     tensorflow
25 |     tensorflow_datasets
26 |     timeout_decorator
27 |     torch
28 |     torchtext
29 |     torchvision
30 |     torch_xla
31 |     tqdm
32 | 
33 | line_length = 119
34 | lines_after_imports = 2
35 | multi_line_output = 3
36 | use_parentheses = True
37 | 
38 | [flake8]
39 | ignore = E203, E501, E741, W503
40 | max-line-length = 119
41 | 


--------------------------------------------------------------------------------
/model_cards/julien-c/bert-xsmall-dummy/README.md:
--------------------------------------------------------------------------------
 1 | ## How to build a dummy model
 2 | 
 3 | 
 4 | ```python
 5 | from transformers.configuration_bert import BertConfig
 6 | from transformers.modeling_bert import BertForMaskedLM
 7 | from transformers.modeling_tf_bert import TFBertForMaskedLM
 8 | from transformers.tokenization_bert import BertTokenizer
 9 | 
10 | 
11 | SMALL_MODEL_IDENTIFIER = "julien-c/bert-xsmall-dummy"
12 | DIRNAME = "./bert-xsmall-dummy"
13 | 
14 | config = BertConfig(10, 20, 1, 1, 40)
15 | 
16 | model = BertForMaskedLM(config)
17 | model.save_pretrained(DIRNAME)
18 | 
19 | tf_model = TFBertForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
20 | tf_model.save_pretrained(DIRNAME)
21 | 
22 | # Slightly different for tokenizer.
23 | # tokenizer = BertTokenizer.from_pretrained(DIRNAME)
24 | # tokenizer.save_pretrained()
25 | ```
26 | 


--------------------------------------------------------------------------------
/model_cards/spentaur/yelp/README.md:
--------------------------------------------------------------------------------
 1 | # DistilBERT Yelp Review Sentiment
 2 | This model is used for sentiment analysis on english yelp reviews.  
 3 | It is a DistilBERT model trained on 1 million reviews from the yelp open dataset.  
 4 | It is a regression model, with outputs in the range of ~-2 to ~2. With -2 being 1 star and 2 being 5 stars.  
 5 | It was trained using the [ktrain](https://github.com/amaiya/ktrain) because of it's ease of use.
 6 | 
 7 | Example use:
 8 | 
 9 | ```
10 | tokenizer = AutoTokenizer.from_pretrained(
11 |     'distilbert-base-uncased', use_fast=True)
12 | model = TFAutoModelForSequenceClassification.from_pretrained(
13 |     "spentaur/yelp")
14 |     
15 | review = "This place is great!"
16 | input_ids = tokenizer.encode(review, return_tensors='tf')
17 | pred = model(input_ids)[0][0][0].numpy()
18 | # pred should === 1.9562385
19 | ```
20 | 


--------------------------------------------------------------------------------
/.circleci/deploy.sh:
--------------------------------------------------------------------------------
 1 | cd docs
 2 | 
 3 | function deploy_doc(){
 4 | 	echo "Creating doc at commit $1 and pushing to folder $2"
 5 | 	git checkout $1
 6 | 	if [ ! -z "$2" ]
 7 | 	then
 8 | 		if [ -d "$dir/$2" ]; then
 9 | 			echo "Directory" $2 "already exists"
10 | 		else
11 | 			echo "Pushing version" $2
12 | 			make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
13 | 		fi
14 | 	else
15 | 		echo "Pushing master"
16 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
17 | 	fi
18 | }
19 | 
20 | deploy_doc "master"
21 | deploy_doc "b33a385" v1.0.0
22 | deploy_doc "fe02e45" v1.1.0
23 | deploy_doc "89fd345" v1.2.0
24 | deploy_doc "fc9faa8" v2.0.0
25 | deploy_doc "3ddce1d" v2.1.1
26 | deploy_doc "3616209" v2.2.0
27 | deploy_doc "d0f8b9a" v2.3.0
28 | deploy_doc "6664ea9" v2.4.0
29 | deploy_doc "fb560dc" v2.5.0
30 | 


--------------------------------------------------------------------------------
/adapter_docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | Our *adapter-transformers* package is a drop-in replacement for Huggingface's *transformers* library. As the original package, it is tested on Python 3.6+ and PyTorch 1.1.0+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first. 
 4 | 
 5 | ## Using pip (from GitHub)
 6 | 
 7 | The simplest way of installation is by using pip to install the package from our GitHub repository:
 8 | 
 9 | ```
10 | pip install git+https://github.com/adapter-hub/adapter-transformers.git
11 | ```
12 | 
13 | ## From repository
14 | 
15 | Alternatively, you can clone the repository first and install the package from source.
16 | This allows you to run the included example scripts:
17 | 
18 | ```
19 | git clone https://github.com/adapter-hub/adapter-transformers.git
20 | cd adapter-transformers
21 | pip install .
22 | ```
23 | 


--------------------------------------------------------------------------------
/src/transformers/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | from .metrics import is_sklearn_available
 6 | from .processors import (
 7 |     DataProcessor,
 8 |     InputExample,
 9 |     InputFeatures,
10 |     SingleSentenceClassificationProcessor,
11 |     SquadExample,
12 |     SquadFeatures,
13 |     SquadV1Processor,
14 |     SquadV2Processor,
15 |     glue_convert_examples_to_features,
16 |     glue_output_modes,
17 |     glue_processors,
18 |     glue_tasks_num_labels,
19 |     squad_convert_examples_to_features,
20 |     xnli_output_modes,
21 |     xnli_processors,
22 |     xnli_tasks_num_labels,
23 | )
24 | 
25 | 
26 | if is_sklearn_available():
27 |     from .metrics import glue_compute_metrics, xnli_compute_metrics
28 | 


--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-imdb/README.md:
--------------------------------------------------------------------------------
 1 | # GPT2-IMDB
 2 | 
 3 | ## What is it?
 4 | A GPT2 (`gpt2`) language model fine-tuned on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews).
 5 | 
 6 | ## Training setting
 7 | 
 8 | The GPT2 language model was fine-tuned for 1 epoch on the IMDB dataset. All comments were joined into a single text file separated by the EOS token:
 9 | 
10 | ```
11 | import pandas as pd
12 | df = pd.read_csv("imdb-dataset.csv")
13 | imdb_str = " <|endoftext|> ".join(df['review'].tolist())
14 | 
15 | with open ('imdb.txt', 'w') as f:
16 |     f.write(imdb_str)
17 | ```
18 | 
19 | To train the model the `run_language_modeling.py` script in the `transformer` library was used:
20 | 
21 | ```
22 | python run_language_modeling.py 
23 | 	--train_data_file imdb.txt 
24 | 	--output_dir gpt2-imdb 
25 | 	--model_type gpt2 
26 | 	--model_name_or_path gpt2
27 | ```
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680 Feature request"
 3 | about: Submit a proposal/request for a new adapter-transformers feature
 4 | title: ''
 5 | labels: 'enhancement'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🚀 Feature request
11 | 
12 | <!-- A clear and concise description of the feature proposal.
13 |      Please provide a link to the paper and code in case they exist. -->
14 | 
15 | ## Motivation
16 | 
17 | <!-- Please outline the motivation for the proposal. Is your feature request
18 |      related to a problem? e.g., I'm always frustrated when [...]. If this is related
19 |      to another GitHub issue, please link here too. -->
20 | 
21 | ## Your contribution
22 | 
23 | <!-- Is there any way that you could help, e.g. by submitting a PR?
24 |      Make sure to read the CONTRIBUTING.MD readme:
25 |      https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md -->
26 | 


--------------------------------------------------------------------------------
/adapter_docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/model_cards/allenai/longformer-base-4096-extra.pos.embd.only/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # longformer-base-4096-extra.pos.embd.only
 3 | 
 4 | This model is similar to `longformer-base-4096` but it was pretrained to preserve RoBERTa weights by freezing all RoBERTa weights and only train the additional position embeddings. 
 5 | 
 6 | 
 7 | ### Citing
 8 | 
 9 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150).
10 | ```
11 | @article{Beltagy2020Longformer,
12 |   title={Longformer: The Long-Document Transformer},
13 |   author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
14 |   journal={arXiv:2004.05150},
15 |   year={2020},
16 | }
17 | ```
18 | 
19 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org).
20 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.
21 | 


--------------------------------------------------------------------------------
/docs/source/main_classes/model.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ----------------------------------------------------
 3 | 
 4 | The base class ``PreTrainedModel`` implements the common methods for loading/saving a model either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository).
 5 | 
 6 | ``PreTrainedModel`` also implements a few methods which are common among all the models to:
 7 | 
 8 | - resize the input token embeddings when new tokens are added to the vocabulary
 9 | - prune the attention heads of the model.
10 | 
11 | ``PreTrainedModel``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.PreTrainedModel
15 |     :members:
16 | 
17 | ``Helper Functions``
18 | ~~~~~~~~~~~~~~~~~~~~~
19 | 
20 | .. autofunction:: transformers.apply_chunking_to_forward
21 | 
22 | 
23 | ``TFPreTrainedModel``
24 | ~~~~~~~~~~~~~~~~~~~~~
25 | 
26 | .. autoclass:: transformers.TFPreTrainedModel
27 |     :members:
28 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased-conversational/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - russian
 4 | ---
 5 | 
 6 | # rubert-base-cased-conversational
 7 | 
 8 | Conversational RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on OpenSubtitles\[1\], [Dirty](https://d3.ru/), [Pikabu](https://pikabu.ru/), and a Social Media segment of Taiga corpus\[2\]. We assembled a new vocabulary for Conversational RuBERT model on this data and initialized the model with [RuBERT](../rubert-base-cased).
 9 | 
10 | 
11 | \[1\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\)
12 | 
13 | \[2\]: Shavrina T., Shapovalova O. \(2017\) TO THE METHODOLOGY OF CORPUS CONSTRUCTION FOR MACHINE LEARNING: «TAIGA» SYNTAX TREE CORPUS AND PARSER. in proc. of “CORPORA2017”, international conference , Saint-Petersbourg, 2017.
14 | 


--------------------------------------------------------------------------------
/tests/test_adapter_saving.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from transformers import ADAPTER_CONFIG_MAP, AdapterType, BertModel, RobertaModel, XLMRobertaModel
 4 | 
 5 | from .utils import require_torch
 6 | 
 7 | 
 8 | @require_torch
 9 | class AdapterModelTest(unittest.TestCase):
10 |     model_classes = [BertModel, RobertaModel, XLMRobertaModel]
11 | 
12 |     def test_model_config_serialization(self):
13 |         """PretrainedConfigurations should not raise an Exception when serializing the config dict
14 | 
15 |         See, e.g., PretrainedConfig.to_json_string()
16 |         """
17 |         for model_class in self.model_classes:
18 |             for k, v in ADAPTER_CONFIG_MAP.items():
19 |                 model_config = model_class.config_class
20 |                 model = model_class(model_config())
21 |                 model.add_adapter("test", adapter_type=AdapterType.text_task, config=v)
22 |                 # should not raise an exception
23 |                 model.config.to_json_string()
24 | 


--------------------------------------------------------------------------------
/.github/workflows/adapter_docs_build.yml:
--------------------------------------------------------------------------------
 1 | name: Build Adapter Docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |     paths: [ 'adapter_docs/**' ]
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 |         with:
14 |           submodules: recursive
15 |       - uses: actions/setup-python@v2
16 |         with:
17 |           python-version: 3.6
18 |       - name: Install
19 |         run: |
20 |           pip install .[tf,torch,docs]
21 |       - name: Build
22 |         run: |
23 |           cd adapter_docs && make html && cd ..
24 |       - name: Deploy
25 |         uses: peaceiris/actions-gh-pages@v3
26 |         with:
27 |           github_token: ${{ secrets.GITHUB_TOKEN }}
28 |           user_name: "Adapter-Hub-Bert"
29 |           user_email: "---"
30 |           publish_dir: ./adapter_docs/_build/html
31 |           publish_branch: gh-pages
32 |           force_orphan: true
33 |           cname: docs.adapterhub.ml
34 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: quality style test test-examples
 2 | 
 3 | # Check that source code meets quality standards
 4 | 
 5 | quality:
 6 | 	black --check --line-length 119 --target-version py35 examples templates tests src utils
 7 | 	isort --check-only --recursive examples templates tests src utils
 8 | 	flake8 examples templates tests src utils
 9 | 
10 | # Format source code automatically
11 | 
12 | style:
13 | 	black --line-length 119 --target-version py35 examples templates tests src utils
14 | 	isort --recursive examples templates tests src utils
15 | 
16 | # Run tests for the library
17 | 
18 | test:
19 | 	python -m pytest -n auto --dist=loadfile -s -v ./tests/
20 | 
21 | test-reduced:
22 | 	python -m pytest -n auto --dist=loadfile -s -v\
23 | 		--ignore-glob='tests/test_tokenization*'\
24 | 		--ignore-glob='tests/test_pipelines*'\
25 | 		--ignore-glob='tests/test_hf*'\
26 | 		--ignore-glob='tests/test_doc*'\
27 | 		./tests/
28 | 
29 | # Run tests for examples
30 | 
31 | test-examples:
32 | 	python -m pytest -n auto --dist=loadfile -s -v ./examples/
33 | 


--------------------------------------------------------------------------------
/model_cards/codegram/calbert-base-uncased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: catalan
 3 | ---
 4 | 
 5 | # CALBERT: a Catalan Language Model
 6 | 
 7 | ## Introduction
 8 | 
 9 | CALBERT is an open-source language model for Catalan based on the ALBERT architecture. 
10 | 
11 | It is now available on Hugging Face in its `base-uncased` version, and was pretrained on the [OSCAR dataset](https://traces1.inria.fr/oscar/).
12 | 
13 | For further information or requests, please go to the [GitHub repository](https://github.com/codegram/calbert)
14 | 
15 | ## Pre-trained models
16 | 
17 | | Model                               |  Arch.           | Training data                     |
18 | |-------------------------------------|------------------|-----------------------------------|
19 | | `codegram` / `calbert-base-uncased` |  Base (uncased)  | OSCAR (4.3 GB of text)            |
20 | 
21 | 
22 | ## Authors 
23 | 
24 | CALBERT was trained and evaluated by [Txus Bach](https://twitter.com/txustice), as part of [Codegram](https://www.codegram.com)'s applied research.
25 | 
26 | 


--------------------------------------------------------------------------------
/examples/summarization/bart/run_train_tiny.sh:
--------------------------------------------------------------------------------
 1 | # Script for verifying that run_bart_sum can be invoked from its directory
 2 | 
 3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test)
 4 | wget https://s3.amazonaws.com/datasets.huggingface.co/summarization/cnn_tiny.tgz
 5 | tar -xzvf cnn_tiny.tgz
 6 | rm cnn_tiny.tgz
 7 | 
 8 | export OUTPUT_DIR_NAME=bart_utest_output
 9 | export CURRENT_DIR=${PWD}
10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
11 | 
12 | # Make output directory if it doesn't exist
13 | mkdir -p $OUTPUT_DIR
14 | 
15 | # Add parent directory to python path to access lightning_base.py and utils.py
16 | export PYTHONPATH="../../":"${PYTHONPATH}"
17 | python finetune.py \
18 | --data_dir=cnn_tiny/ \
19 | --model_type=bart \
20 | --model_name_or_path=sshleifer/bart-tiny-random \
21 | --learning_rate=3e-5 \
22 | --train_batch_size=2 \
23 | --eval_batch_size=2 \
24 | --output_dir=$OUTPUT_DIR \
25 | --num_train_epochs=1  \
26 | --n_gpu=0 \
27 | --do_train $@
28 | 
29 | rm -rf cnn_tiny
30 | rm -rf $OUTPUT_DIR
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased-sentence/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - russian
 4 | ---
 5 | 
 6 | # rubert-base-cased-sentence
 7 | 
 8 | Sentence RuBERT \(Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters\) is a representation‑based sentence encoder for Russian. It is initialized with RuBERT and fine‑tuned on SNLI\[1\] google-translated to russian and on russian part of XNLI dev set\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\].
 9 | 
10 | 
11 | \[1\]: S. R. Bowman, G. Angeli, C. Potts, and C. D. Manning. \(2015\) A large annotated corpus for learning natural language inference. arXiv preprint [arXiv:1508.05326](https://arxiv.org/abs/1508.05326)
12 | 
13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053)
14 | 
15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084)
16 | 


--------------------------------------------------------------------------------
/tests/test_activations.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from transformers import is_torch_available
 4 | 
 5 | from .utils import require_torch
 6 | 
 7 | 
 8 | if is_torch_available():
 9 |     from transformers.activations import _gelu_python, get_activation, gelu_new
10 |     import torch
11 | 
12 | 
13 | @require_torch
14 | class TestActivations(unittest.TestCase):
15 |     def test_gelu_versions(self):
16 |         x = torch.Tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
17 |         torch_builtin = get_activation("gelu")
18 |         self.assertTrue(torch.eq(_gelu_python(x), torch_builtin(x)).all().item())
19 |         self.assertFalse(torch.eq(_gelu_python(x), gelu_new(x)).all().item())
20 | 
21 |     def test_get_activation(self):
22 |         get_activation("swish")
23 |         get_activation("relu")
24 |         get_activation("tanh")
25 |         get_activation("gelu_new")
26 |         get_activation("gelu_fast")
27 |         with self.assertRaises(KeyError):
28 |             get_activation("bogus")
29 |         with self.assertRaises(KeyError):
30 |             get_activation(None)
31 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_marian.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The OPUS-NMT Team, Marian team, and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ Marian model configuration """
16 | 
17 | from .configuration_bart import BartConfig
18 | 
19 | 
20 | PRETRAINED_CONFIG_ARCHIVE_MAP = {
21 |     "Helsinki-NLP/opus-mt-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json",
22 | }
23 | 
24 | 
25 | class MarianConfig(BartConfig):
26 |     model_type = "marian"
27 | 


--------------------------------------------------------------------------------
/model_cards/clue/xlnet_chinese_large/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: chinese
 3 | ---
 4 | 
 5 | ## xlnet_chinese_large
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** xlnet-large
10 | **Model size:** 1.3G
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | ```
22 | import torch
23 | from transformers import XLNetTokenizer,XLNetModel
24 | tokenizer = XLNetTokenizer.from_pretrained("clue/xlnet_chinese_large")
25 | xlnet = XLNetModel.from_pretrained("clue/xlnet_chinese_large")
26 | ```
27 | 
28 | ### About CLUE benchmark
29 | 
30 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
31 | 
32 | Github: https://github.com/CLUEbenchmark
33 | Website: https://www.cluebenchmarks.com/
34 | 


--------------------------------------------------------------------------------
/adapter_docs/classes/weights_loaders.rst:
--------------------------------------------------------------------------------
 1 | Weights Loaders
 2 | =======================
 3 | 
 4 | These classes perform the extraction, saving and loading of module weights to and from the file system.
 5 | All type-specific loader classes inherit from the common ``WeightsLoader`` base class which can also be extended
 6 | to add support for additional custom modules.
 7 | 
 8 | These classes provide the basis of adapter module integration into model classes such as adapter saving and loading.
 9 | Depending on the model, one of these mixins should be implemented by every adapter-supporting model class.
10 | 
11 | WeightsLoader
12 | ------------------
13 | 
14 | .. autoclass:: transformers.WeightsLoader
15 |     :members:
16 | 
17 | AdapterLoader
18 | ---------------------------
19 | 
20 | .. autoclass:: transformers.AdapterLoader
21 |     :members:
22 | 
23 | PredictionHeadLoader
24 | ---------------------------
25 | 
26 | .. autoclass:: transformers.PredictionHeadLoader
27 |     :members:
28 | 
29 | WeightsLoaderHelper
30 | -------------------
31 | 
32 | .. autoclass:: transformers.WeightsLoaderHelper
33 |     :members:
34 | 


--------------------------------------------------------------------------------
/model_cards/ViktorAlm/electra-base-norwegian-uncased-discriminator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: norwegian
 3 | thumbnail: https://i.imgur.com/QqSEC5I.png
 4 | ---
 5 | 
 6 | # Norwegian Electra
 7 | ![Image of norwegian electra](https://i.imgur.com/QqSEC5I.png)
 8 | 
 9 | Trained on Oscar + wikipedia + opensubtitles + some other data I had with the awesome power of TPUs(V3-8)
10 | 
11 | Use with caution. I have no downstream tasks in Norwegian to test on so I have no idea of its performance yet.
12 | # Model
13 | ## Electra: Pre-training Text Encoders as Discriminators Rather Than Generators
14 | Kevin Clark and Minh-Thang Luong and Quoc V. Le and Christopher D. Manning
15 | - https://openreview.net/pdf?id=r1xMH1BtvB
16 | - https://github.com/google-research/electra
17 | # Acknowledgments
18 | ### TensorFlow Research Cloud
19 | Research supported with Cloud TPUs from Google's TensorFlow Research Cloud (TFRC). Thanks for providing access to the TFRC ❤️
20 | - https://www.tensorflow.org/tfrc
21 | #### OSCAR corpus
22 | - https://oscar-corpus.com/
23 | #### OPUS
24 | - http://opus.nlpl.eu/
25 | - http://www.opensubtitles.org/
26 | 


--------------------------------------------------------------------------------
/src/transformers/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | import torch
 5 | 
 6 | from transformers.file_utils import WEIGHTS_NAME
 7 | 
 8 | 
 9 | DIALOGPT_MODELS = ["small", "medium", "large"]
10 | 
11 | OLD_KEY = "lm_head.decoder.weight"
12 | NEW_KEY = "lm_head.weight"
13 | 
14 | 
15 | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
16 |     d = torch.load(checkpoint_path)
17 |     d[NEW_KEY] = d.pop(OLD_KEY)
18 |     os.makedirs(pytorch_dump_folder_path, exist_ok=True)
19 |     torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument("--dialogpt_path", default=".", type=str)
25 |     args = parser.parse_args()
26 |     for MODEL in DIALOGPT_MODELS:
27 |         checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl")
28 |         pytorch_dump_folder_path = f"./DialoGPT-{MODEL}"
29 |         convert_dialogpt_checkpoint(
30 |             checkpoint_path, pytorch_dump_folder_path,
31 |         )
32 | 


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-multilingual-cased-sentence/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - multilingual
 4 | ---
 5 | 
 6 | # bert-base-multilingual-cased-sentence
 7 | 
 8 | Sentence Multilingual BERT \(101 languages, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) is a representation‑based sentence encoder for 101 languages of Multilingual BERT. It is initialized with Multilingual BERT and then fine‑tuned on english MultiNLI\[1\] and on dev set of multilingual XNLI\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\].
 9 | 
10 | 
11 | \[1\]: Williams A., Nangia N. & Bowman S. \(2017\) A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference. arXiv preprint [arXiv:1704.05426](https://arxiv.org/abs/1704.05426)
12 | 
13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053)
14 | 
15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084)
16 | 


--------------------------------------------------------------------------------
/tests/test_adapter_fusion_saving.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from transformers import ADAPTERFUSION_CONFIG_MAP, AdapterType, BertModel, RobertaModel, XLMRobertaModel
 4 | 
 5 | from .utils import require_torch
 6 | 
 7 | 
 8 | @require_torch
 9 | class AdapterFusionModelTest(unittest.TestCase):
10 |     model_classes = [BertModel, RobertaModel, XLMRobertaModel]
11 | 
12 |     def test_model_config_serialization(self):
13 |         """PretrainedConfigurations should not raise an Exception when serializing the config dict
14 | 
15 |         See, e.g., PretrainedConfig.to_json_string()
16 |         """
17 |         for model_class in self.model_classes:
18 |             for k, v in ADAPTERFUSION_CONFIG_MAP.items():
19 |                 model_config = model_class.config_class
20 |                 model = model_class(model_config())
21 |                 model.add_adapter("test1", AdapterType.text_task)
22 |                 model.add_adapter("test2", AdapterType.text_task)
23 |                 model.add_fusion(["test1", "test2"], adapter_fusion_config=v)
24 |                 # should not raise an exception
25 |                 model.config.to_json_string()
26 | 


--------------------------------------------------------------------------------
/examples/token-classification/test_ner_examples.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | import unittest
 4 | from unittest.mock import patch
 5 | 
 6 | import run_ner
 7 | 
 8 | 
 9 | logging.basicConfig(level=logging.INFO)
10 | 
11 | logger = logging.getLogger()
12 | 
13 | 
14 | class ExamplesTests(unittest.TestCase):
15 |     def test_run_ner(self):
16 |         stream_handler = logging.StreamHandler(sys.stdout)
17 |         logger.addHandler(stream_handler)
18 | 
19 |         testargs = """
20 |             --model_name distilbert-base-german-cased
21 |             --output_dir ./tests/fixtures/tests_samples/temp_dir
22 |             --overwrite_output_dir
23 |             --data_dir ./tests/fixtures/tests_samples/GermEval
24 |             --labels ./tests/fixtures/tests_samples/GermEval/labels.txt
25 |             --max_seq_length 128
26 |             --num_train_epochs 6
27 |             --logging_steps 1
28 |             --do_train
29 |             --do_eval
30 |             """.split()
31 |         with patch.object(sys, "argv", ["run.py"] + testargs):
32 |             result = run_ner.main()
33 |             self.assertLess(result["eval_loss"], 1.5)
34 | 


--------------------------------------------------------------------------------
/model_cards/deepset/bert-base-german-cased-oldvocab/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: german
 3 | thumbnail: https://static.tildacdn.com/tild6438-3730-4164-b266-613634323466/german_bert.png
 4 | tags:
 5 | - exbert
 6 | ---
 7 | 
 8 | <a href="https://huggingface.co/exbert/?model=bert-base-german-cased">
 9 | 	<img width="300px" src="https://hf-dinosaur.huggingface.co/exbert/button.png">
10 | </a>
11 | 
12 | # German BERT with old vocabulary
13 | For details see the related [FARM issue](https://github.com/deepset-ai/FARM/issues/60).
14 | 
15 | 
16 | ## About us
17 | ![deepset logo](https://raw.githubusercontent.com/deepset-ai/FARM/master/docs/img/deepset_logo.png)
18 | 
19 | We bring NLP to the industry via open source!  
20 | Our focus: Industry specific language models & large scale QA systems.  
21 |   
22 | Some of our work: 
23 | - [German BERT (aka "bert-base-german-cased")](https://deepset.ai/german-bert)
24 | - [FARM](https://github.com/deepset-ai/FARM)
25 | - [Haystack](https://github.com/deepset-ai/haystack/)
26 | 
27 | Get in touch:
28 | [Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Website](https://deepset.ai)  
29 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/encoderdecoder.rst:
--------------------------------------------------------------------------------
 1 | Encoder Decoder Models
 2 | -----------
 3 | 
 4 | This class can wrap an encoder model, such as ``BertModel`` and a decoder modeling with a language modeling head, such as ``BertForMaskedLM`` into a encoder-decoder model.
 5 | 
 6 | The ``EncoderDecoderModel`` class allows to instantiate a encoder decoder model using the ``from_encoder_decoder_pretrain`` class method taking a pretrained encoder and pretrained decoder model as an input. 
 7 | The ``EncoderDecoderModel`` is saved using the standard ``save_pretrained()`` method and can also again be loaded using the standard ``from_pretrained()`` method. 
 8 | 
 9 | An application of this architecture could be *summarization* using two pretrained Bert models as is shown in the paper: `Text Summarization with Pretrained Encoders <https://arxiv.org/abs/1910.13461>`_ by Yang Liu and Mirella Lapata. 
10 | 
11 | 
12 | ``EncoderDecoderConfig``
13 | ~~~~~~~~~~~~~~~~~~~~~
14 | 
15 | .. autoclass:: transformers.EncoderDecoderConfig
16 |     :members:
17 | 
18 | 
19 | ``EncoderDecoderModel``
20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
21 | 
22 | .. autoclass:: transformers.EncoderDecoderModel
23 |     :members:
24 | 


--------------------------------------------------------------------------------
/examples/text-classification/run_pl.sh:
--------------------------------------------------------------------------------
 1 | # Install newest ptl.
 2 | pip install -U git+http://github.com/PyTorchLightning/pytorch-lightning/
 3 | # Install example requirements
 4 | pip install -r ../requirements.txt
 5 | 
 6 | # Download glue data
 7 | python3 ../../utils/download_glue_data.py
 8 | 
 9 | export TASK=mrpc
10 | export DATA_DIR=./glue_data/MRPC/
11 | export MAX_LENGTH=128
12 | export LEARNING_RATE=2e-5
13 | export BERT_MODEL=bert-base-cased
14 | export BATCH_SIZE=32
15 | export NUM_EPOCHS=3
16 | export SEED=2
17 | export OUTPUT_DIR_NAME=mrpc-pl-bert
18 | export CURRENT_DIR=${PWD}
19 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
20 | 
21 | # Make output directory if it doesn't exist
22 | mkdir -p $OUTPUT_DIR
23 | # Add parent directory to python path to access lightning_base.py
24 | export PYTHONPATH="../":"${PYTHONPATH}"
25 | 
26 | python3 run_pl_glue.py --data_dir $DATA_DIR \
27 | --task $TASK \
28 | --model_name_or_path $BERT_MODEL \
29 | --output_dir $OUTPUT_DIR \
30 | --max_seq_length  $MAX_LENGTH \
31 | --learning_rate $LEARNING_RATE \
32 | --num_train_epochs $NUM_EPOCHS \
33 | --train_batch_size $BATCH_SIZE \
34 | --seed $SEED \
35 | --do_train \
36 | --do_predict
37 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/STS-B/train.tsv:
--------------------------------------------------------------------------------
 1 | index	genre	filename	year	old_index	source1	source2	sentence1	sentence2	score
 2 | 0	main-captions	MSRvid	2012test	0001	none	none	A plane is taking off.	An air plane is taking off.	5.000
 3 | 1	main-captions	MSRvid	2012test	0004	none	none	A man is playing a large flute.	A man is playing a flute.	3.800
 4 | 2	main-captions	MSRvid	2012test	0005	none	none	A man is spreading shreded cheese on a pizza.	A man is spreading shredded cheese on an uncooked pizza.	3.800
 5 | 3	main-captions	MSRvid	2012test	0006	none	none	Three men are playing chess.	Two men are playing chess.	2.600
 6 | 4	main-captions	MSRvid	2012test	0009	none	none	A man is playing the cello.	A man seated is playing the cello.	4.250
 7 | 5	main-captions	MSRvid	2012test	0011	none	none	Some men are fighting.	Two men are fighting.	4.250
 8 | 6	main-captions	MSRvid	2012test	0012	none	none	A man is smoking.	A man is skating.	0.500
 9 | 7	main-captions	MSRvid	2012test	0013	none	none	The man is playing the piano.	The man is playing the guitar.	1.600
10 | 8	main-captions	MSRvid	2012test	0014	none	none	A man is playing on a guitar and singing.	A woman is playing an acoustic guitar and singing.	2.200
11 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/STS-B/dev.tsv:
--------------------------------------------------------------------------------
 1 | index	genre	filename	year	old_index	source1	source2	sentence1	sentence2	score
 2 | 0	main-captions	MSRvid	2012test	0000	none	none	A man with a hard hat is dancing.	A man wearing a hard hat is dancing.	5.000
 3 | 1	main-captions	MSRvid	2012test	0002	none	none	A young child is riding a horse.	A child is riding a horse.	4.750
 4 | 2	main-captions	MSRvid	2012test	0003	none	none	A man is feeding a mouse to a snake.	The man is feeding a mouse to the snake.	5.000
 5 | 3	main-captions	MSRvid	2012test	0007	none	none	A woman is playing the guitar.	A man is playing guitar.	2.400
 6 | 4	main-captions	MSRvid	2012test	0008	none	none	A woman is playing the flute.	A man is playing a flute.	2.750
 7 | 5	main-captions	MSRvid	2012test	0010	none	none	A woman is cutting an onion.	A man is cutting onions.	2.615
 8 | 6	main-captions	MSRvid	2012test	0015	none	none	A man is erasing a chalk board.	The man is erasing the chalk board.	5.000
 9 | 7	main-captions	MSRvid	2012test	0023	none	none	A woman is carrying a boy.	A woman is carrying her baby.	2.333
10 | 8	main-captions	MSRvid	2012test	0027	none	none	Three men are playing guitars.	Three men are on stage playing guitars.	3.750
11 | 


--------------------------------------------------------------------------------
/model_cards/canwenxu/BERT-of-Theseus-MNLI/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | thumbnail: https://raw.githubusercontent.com/JetRunner/BERT-of-Theseus/master/bert-of-theseus.png
 3 | ---
 4 | 
 5 | # BERT-of-Theseus
 6 | See our paper ["BERT-of-Theseus: Compressing BERT by Progressive Module Replacing"](http://arxiv.org/abs/2002.02925).
 7 | 
 8 | BERT-of-Theseus is a new compressed BERT by progressively replacing the components of the original BERT.
 9 | 
10 | ![BERT of Theseus](https://github.com/JetRunner/BERT-of-Theseus/blob/master/bert-of-theseus.png?raw=true)
11 | 
12 | ## Load Pretrained Model on MNLI
13 | 
14 | We provide a 6-layer pretrained model on MNLI as a general-purpose model, which can transfer to other sentence classification tasks, outperforming DistillBERT (with the same 6-layer structure) on six tasks of GLUE (dev set).
15 | 
16 | | Method          | MNLI | MRPC | QNLI | QQP  | RTE  | SST-2 | STS-B |
17 | |-----------------|------|------|------|------|------|-------|-------|
18 | | BERT-base       | 83.5 | 89.5 | 91.2 | 89.8 | 71.1 | 91.5  | 88.9  |
19 | | DistillBERT     | 79.0 | 87.5 | 85.3 | 84.9 | 59.9 | 90.7  | 81.2  |
20 | | BERT-of-Theseus | 82.1 | 87.5 | 88.8 | 88.8 | 70.1 | 91.8  | 87.8  |
21 | 


--------------------------------------------------------------------------------
/model_cards/clue/roberta_chinese_base/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: chinese
 3 | ---
 4 | 
 5 | ## roberta_chinese_base
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** roberta-base
10 | **Model size:** 392M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!!
22 | 
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, BertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_base")
27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_base")
28 | ```
29 | 
30 | ### About CLUE benchmark
31 | 
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 | 
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 | 


--------------------------------------------------------------------------------
/model_cards/clue/roberta_chinese_large/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: chinese
 3 | ---
 4 | 
 5 | ## roberta_chinese_large
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** roberta-large
10 | **Model size:** 1.2G
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!!
22 | 
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, BertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_large")
27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_large")
28 | ```
29 | 
30 | ### About CLUE benchmark
31 | 
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 | 
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 | 


--------------------------------------------------------------------------------
/model_cards/surajp/albert-base-sanskrit/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: sanskrit
 3 | ---
 4 | 
 5 | 
 6 | # ALBERT-base-Sanskrit
 7 | 
 8 | 
 9 | Explaination Notebook Colab: [SanskritALBERT.ipynb](https://colab.research.google.com/github/parmarsuraj99/suraj-parmar/blob/master/_notebooks/2020-05-02-SanskritALBERT.ipynb)
10 | 
11 | Size of the model is **46MB**
12 | 
13 | Example of usage:
14 | 
15 | ```
16 | tokenizer = AutoTokenizer.from_pretrained("surajp/albert-base-sanskrit")
17 | model = AutoModel.from_pretrained("surajp/albert-base-sanskrit")
18 | 
19 | enc=tokenizer.encode("ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥")
20 | print(tokenizer.decode(enc))
21 | 
22 | ps = model(torch.tensor(enc).unsqueeze(1))
23 | print(ps[0].shape)
24 | ```
25 | ```
26 | '''
27 | Output:
28 | --------
29 | [CLS] ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥[SEP]
30 | torch.Size([28, 1, 768])
31 | ```
32 | 
33 | 
34 | > Created by [Suraj Parmar/@parmarsuraj99](https://twitter.com/parmarsuraj99)
35 | 
36 | > Made with <span style="color: #e25555;">&hearts;</span> in India
37 | 


--------------------------------------------------------------------------------
/examples/benchmarking/run_benchmark.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ Benchmarking the library on inference and training """
17 | 
18 | from transformers import HfArgumentParser, PyTorchBenchmark, PyTorchBenchmarkArguments
19 | 
20 | 
21 | def main():
22 |     parser = HfArgumentParser(PyTorchBenchmarkArguments)
23 |     benchmark_args = parser.parse_args_into_dataclasses()[0]
24 |     benchmark = PyTorchBenchmark(args=benchmark_args)
25 |     benchmark.run()
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     main()
30 | 


--------------------------------------------------------------------------------
/model_cards/wptoux/albert-chinese-large-qa/README.md:
--------------------------------------------------------------------------------
 1 | # albert-chinese-large-qa
 2 | Albert large QA model pretrained from baidu webqa and baidu dureader datasets.
 3 | 
 4 | ## Data source
 5 | + baidu webqa 1.0
 6 | + baidu dureader
 7 | 
 8 | ## Traing Method
 9 | We combined the two datasets together and created a new dataset in squad format, including 705139 samples for training and 69638 samples for validation.
10 | We finetune the model based on the albert chinese large model.
11 | 
12 | ## Hyperparams
13 | + learning_rate 1e-5
14 | + max_seq_length 512
15 | + max_query_length 50
16 | + max_answer_length 300
17 | + doc_stride 256
18 | + num_train_epochs 2
19 | + warmup_steps 1000
20 | + per_gpu_train_batch_size 8
21 | + gradient_accumulation_steps 3
22 | + n_gpu 2 (Nvidia Tesla P100)
23 | 
24 | ## Usage
25 | ```
26 | from transformers import AutoModelForQuestionAnswering, BertTokenizer
27 | 
28 | model = AutoModelForQuestionAnswering.from_pretrained('wptoux/albert-chinese-large-qa')
29 | tokenizer = BertTokenizer.from_pretrained('wptoux/albert-chinese-large-qa')
30 | ```
31 | ***Important: use BertTokenizer***
32 | 
33 | ## MoreInfo
34 | Please visit https://github.com/wptoux/albert-chinese-large-webqa for details.
35 | 


--------------------------------------------------------------------------------
/model_cards/clue/albert_chinese_tiny/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: chinese
 3 | ---
 4 | 
 5 | ## albert_chinese_tiny
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** albert-tiny
10 | **Model size:** 16M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_tiny` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!!
22 | 
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, AlbertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_tiny")
27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_tiny")
28 | ```
29 | 
30 | ### About CLUE benchmark
31 | 
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 | 
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 | 


--------------------------------------------------------------------------------
/model_cards/allenai/scibert_scivocab_cased/README.md:
--------------------------------------------------------------------------------
 1 | # SciBERT
 2 | 
 3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text.
 4 | 
 5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts.
 6 | 
 7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions. 
 8 | 
 9 | Available models include:
10 | * `scibert_scivocab_cased`
11 | * `scibert_scivocab_uncased`
12 | 
13 | 
14 | The original repo can be found [here](https://github.com/allenai/scibert).
15 | 
16 | If using these models, please cite the following paper:
17 | ```
18 | @inproceedings{beltagy-etal-2019-scibert,
19 |     title = "SciBERT: A Pretrained Language Model for Scientific Text",
20 |     author = "Beltagy, Iz  and Lo, Kyle  and Cohan, Arman",
21 |     booktitle = "EMNLP",
22 |     year = "2019",
23 |     publisher = "Association for Computational Linguistics",
24 |     url = "https://www.aclweb.org/anthology/D19-1371"
25 | }
26 | ```
27 | 


--------------------------------------------------------------------------------
/model_cards/clue/albert_chinese_small/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: chinese
 3 | ---
 4 | 
 5 | ## albert_chinese_small
 6 | 
 7 | ### Overview
 8 | 
 9 | **Language model:** albert-small
10 | **Model size:** 18.5M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 | 
15 | ### Results
16 | 
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 | 
19 | ### Usage
20 | 
21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_small` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!!
22 | 
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, AlbertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_small")
27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_small")
28 | ```
29 | 
30 | ### About CLUE benchmark
31 | 
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 | 
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 | 


--------------------------------------------------------------------------------
/model_cards/allenai/scibert_scivocab_uncased/README.md:
--------------------------------------------------------------------------------
 1 | # SciBERT
 2 | 
 3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text.
 4 | 
 5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts.
 6 | 
 7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions. 
 8 | 
 9 | Available models include:
10 | * `scibert_scivocab_cased`
11 | * `scibert_scivocab_uncased`
12 | 
13 | 
14 | The original repo can be found [here](https://github.com/allenai/scibert).
15 | 
16 | If using these models, please cite the following paper:
17 | ```
18 | @inproceedings{beltagy-etal-2019-scibert,
19 |     title = "SciBERT: A Pretrained Language Model for Scientific Text",
20 |     author = "Beltagy, Iz  and Lo, Kyle  and Cohan, Arman",
21 |     booktitle = "EMNLP",
22 |     year = "2019",
23 |     publisher = "Association for Computational Linguistics",
24 |     url = "https://www.aclweb.org/anthology/D19-1371"
25 | }
26 | ```
27 | 


--------------------------------------------------------------------------------
/model_cards/julien-c/EsperBERTo-small-pos/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: esperanto
 3 | thumbnail: https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png
 4 | ---
 5 | 
 6 | # EsperBERTo: RoBERTa-like Language model trained on Esperanto
 7 | 
 8 | **Companion model to blog post https://huggingface.co/blog/how-to-train** 🔥
 9 | 
10 | ## Training Details
11 | 
12 | - current checkpoint: 566000
13 | - machine name: `galinette`
14 | 
15 | 
16 | ![](https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png)
17 | 
18 | ## Example pipeline
19 | 
20 | ```python
21 | from transformers import TokenClassificationPipeline, pipeline
22 | 
23 | 
24 | MODEL_PATH = "./models/EsperBERTo-small-pos/"
25 | 
26 | nlp = pipeline(
27 |     "ner",
28 |     model=MODEL_PATH,
29 |     tokenizer=MODEL_PATH,
30 | )
31 | # or instantiate a TokenClassificationPipeline directly.
32 | 
33 | nlp("Mi estas viro kej estas tago varma.")
34 | 
35 | # {'entity': 'PRON', 'score': 0.9979867339134216, 'word': ' Mi'}
36 | # {'entity': 'VERB', 'score': 0.9683094620704651, 'word': ' estas'}
37 | # {'entity': 'VERB', 'score': 0.9797462821006775, 'word': ' estas'}
38 | # {'entity': 'NOUN', 'score': 0.8509314060211182, 'word': ' tago'}
39 | # {'entity': 'ADJ', 'score': 0.9996201395988464, 'word': ' varma'}
40 | ```


--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-cased-conversational/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - english
 4 | ---
 5 | 
 6 | # bert-base-cased-conversational
 7 | 
 8 | Conversational BERT \(English, cased, 12‑layer, 768‑hidden, 12‑heads, 110M parameters\) was trained on the English part of Twitter, Reddit, DailyDialogues\[1\], OpenSubtitles\[2\], Debates\[3\], Blogs\[4\], Facebook News Comments. We used this training data to build the vocabulary of English subtokens and took English cased version of BERT‑base as an initialization for English Conversational BERT.
 9 | 
10 | 
11 | \[1\]: Yanran Li, Hui Su, Xiaoyu Shen, Wenjie Li, Ziqiang Cao, and Shuzi Niu. DailyDialog: A Manually Labelled Multi-turn Dialogue Dataset. IJCNLP 2017.
12 | 
13 | \[2\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\)
14 | 
15 | \[3\]: Justine Zhang, Ravi Kumar, Sujith Ravi, Cristian Danescu-Niculescu-Mizil. Proceedings of NAACL, 2016.
16 | 
17 | \[4\]: J. Schler, M. Koppel, S. Argamon and J. Pennebaker \(2006\). Effects of Age and Gender on Blogging in Proceedings of 2006 AAAI Spring Symposium on Computational Approaches for Analyzing Weblogs.
18 | 


--------------------------------------------------------------------------------
/examples/summarization/t5/download_cnn_daily_mail.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | 
 4 | import tensorflow_datasets as tfds
 5 | 
 6 | 
 7 | def main(input_path, reference_path, data_dir):
 8 |     cnn_ds = tfds.load("cnn_dailymail", split="test", shuffle_files=False, data_dir=data_dir)
 9 |     cnn_ds_iter = tfds.as_numpy(cnn_ds)
10 | 
11 |     test_articles_file = Path(input_path).open("w")
12 |     test_summaries_file = Path(reference_path).open("w")
13 | 
14 |     for example in cnn_ds_iter:
15 |         test_articles_file.write(example["article"].decode("utf-8") + "\n")
16 |         test_articles_file.flush()
17 |         test_summaries_file.write(example["highlights"].decode("utf-8").replace("\n", " ") + "\n")
18 |         test_summaries_file.flush()
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument("input_path", type=str, help="where to save the articles input data")
24 |     parser.add_argument(
25 |         "reference_path", type=str, help="where to save the reference summaries",
26 |     )
27 |     parser.add_argument(
28 |         "--data_dir", type=str, default="~/tensorflow_datasets", help="where to save the tensorflow datasets.",
29 |     )
30 |     args = parser.parse_args()
31 |     main(args.input_path, args.reference_path, args.data_dir)
32 | 


--------------------------------------------------------------------------------
/model_cards/illuin/camembert-base-fquad/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: french
 3 | ---
 4 | 
 5 | # camembert-base-fquad
 6 | 
 7 | ## Description
 8 | 
 9 | A native French Question Answering model [CamemBERT-base](https://camembert-model.fr/) fine-tuned on [FQuAD](https://fquad.illuin.tech/).
10 | 
11 | ## Evaluation results
12 | 
13 | On the development set.
14 | 
15 | ```shell
16 | {"f1": 88.1, "exact_match": 78.1}
17 | ```
18 | 
19 | On the test set.
20 | 
21 | ```shell
22 | {"f1": 88.3, "exact_match": 78.0}
23 | ```
24 | 
25 | ## Usage
26 | 
27 | ```python
28 | from transformers import pipeline
29 | 
30 | nlp = pipeline('question-answering', model='illuin/camembert-base-fquad', tokenizer='illuin/camembert-base-fquad')
31 | 
32 | nlp({
33 |     'question': "Qui est Claude Monet?",
34 |     'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
35 | })
36 | ```
37 | 
38 | ## Citation
39 | 
40 | If you use our work, please cite:
41 | 
42 | ```bibtex
43 | @article{dHoffschmidt2020FQuADFQ,
44 |   title={FQuAD: French Question Answering Dataset},
45 |   author={Martin d'Hoffschmidt and Maxime Vidal and Wacim Belblidia and Tom Brendl'e and Quentin Heinrich},
46 |   journal={ArXiv},
47 |   year={2020},
48 |   volume={abs/2002.06071}
49 | }
50 | ```
51 | 


--------------------------------------------------------------------------------
/model_cards/allenai/longformer-base-4096/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # longformer-base-4096
 3 | [Longformer](https://arxiv.org/abs/2004.05150) is a transformer model for long documents. 
 4 | 
 5 | `longformer-base-4096` is a BERT-like model started from the RoBERTa checkpoint and pretrained for MLM on long documents. It supports sequences of length up to 4,096. 
 6 |  
 7 | Longformer uses a combination of a sliding window (local) attention and global attention. Global attention is user-configured based on the task to allow the model to learn task-specific representations.
 8 | Please refer to the examples in `modeling_longformer.py` and the paper for more details on how to set global attention.
 9 | 
10 | 
11 | ### Citing
12 | 
13 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150).
14 | ```
15 | @article{Beltagy2020Longformer,
16 |   title={Longformer: The Long-Document Transformer},
17 |   author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
18 |   journal={arXiv:2004.05150},
19 |   year={2020},
20 | }
21 | ```
22 | 
23 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org).
24 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.
25 | 


--------------------------------------------------------------------------------
/src/transformers/commands/transformers_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from argparse import ArgumentParser
 3 | 
 4 | from transformers.commands.convert import ConvertCommand
 5 | from transformers.commands.download import DownloadCommand
 6 | from transformers.commands.env import EnvironmentCommand
 7 | from transformers.commands.run import RunCommand
 8 | from transformers.commands.serving import ServeCommand
 9 | from transformers.commands.user import UserCommands
10 | 
11 | 
12 | def main():
13 |     parser = ArgumentParser("Transformers CLI tool", usage="transformers-cli <command> [<args>]")
14 |     commands_parser = parser.add_subparsers(help="transformers-cli command helpers")
15 | 
16 |     # Register commands
17 |     ConvertCommand.register_subcommand(commands_parser)
18 |     DownloadCommand.register_subcommand(commands_parser)
19 |     EnvironmentCommand.register_subcommand(commands_parser)
20 |     RunCommand.register_subcommand(commands_parser)
21 |     ServeCommand.register_subcommand(commands_parser)
22 |     UserCommands.register_subcommand(commands_parser)
23 | 
24 |     # Let's go
25 |     args = parser.parse_args()
26 | 
27 |     if not hasattr(args, "func"):
28 |         parser.print_help()
29 |         exit(1)
30 | 
31 |     # Run
32 |     service = args.func(args)
33 |     service.run()
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     main()
38 | 


--------------------------------------------------------------------------------
/model_cards/jplu/tf-camembert-base/README.md:
--------------------------------------------------------------------------------
 1 | # Tensorflow CamemBERT
 2 | 
 3 | In this repository you will find different versions of the CamemBERT model for Tensorflow.
 4 | 
 5 | ## CamemBERT
 6 | 
 7 | [CamemBERT](https://camembert-model.fr/) is a state-of-the-art language model for French based on the RoBERTa architecture pretrained on the French subcorpus of the newly available multilingual corpus OSCAR.
 8 | 
 9 | ## Model Weights
10 | 
11 | | Model                            | Downloads
12 | | -------------------------------- | ---------------------------------------------------------------------------------------------------------------
13 | | `jplu/tf-camembert-base`   | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-camembert-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-camembert-base/tf_model.h5)
14 | 
15 | ## Usage
16 | 
17 | With Transformers >= 2.4 the Tensorflow models of CamemBERT can be loaded like:
18 | 
19 | ```python
20 | from transformers import TFCamembertModel
21 | 
22 | model = TFCamembertModel.from_pretrained("jplu/tf-camembert-base")
23 | ```
24 | 
25 | ## Huggingface model hub
26 | 
27 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu).
28 | 
29 | ## Acknowledgments
30 | 
31 | Thanks to all the Huggingface team for the support and their amazing library!
32 | 


--------------------------------------------------------------------------------
/model_cards/google/reformer-crime-and-punishment/README.md:
--------------------------------------------------------------------------------
 1 | ## Reformer Model trained on "Crime and Punishment" 
 2 | 
 3 | Crime and Punishment is a novel written by Fyodor Dostoevsky and was translated into English. 
 4 | 
 5 | Crime and Punishment training data was taken from `gs://trax-ml/reformer/crime-and-punishment-2554.txt` and contains 
 6 | roughly 0.5M tokens. 
 7 | 
 8 | The ReformerLM model was trained in flax using colab notebook proposed by authors: https://colab.research.google.com/github/google/trax/blob/master/trax/models/reformer/text_generation.ipynb and the weights were converted to Hugging Face's PyTorch ReformerLM model `ReformerModelWithLMHead`.
 9 | 
10 | The model is a language model that operates on small sub-word units. Text can be generated as follows:
11 | 
12 | ```python
13 | model = ReformerModelWithLMHead.from_pretrained("patrickvonplaten/reformer-crime-and-punish")
14 | tok = ReformerTokenizer.from_pretrained("patrickvonplaten/reformer-crime-and-punish")
15 | tok.decode(model.generate(tok.encode("A few months later", return_tensors="pt"), do_sample=True,temperature=0.7, max_length=100)[0])
16 | 
17 | # gives:'A few months later on was more than anything in the flat. 
18 | # “I have already.” “That’s not my notion that he had forgotten him. 
19 | # What does that matter? And why do you mean? It’s only another fellow,” he said as he went out, as though he want'
20 | ```
21 | 


--------------------------------------------------------------------------------
/tests/test_adapter_fusion_config.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from dataclasses import FrozenInstanceError
 3 | 
 4 | from transformers import ADAPTERFUSION_CONFIG_MAP, AdapterFusionConfig
 5 | 
 6 | from .utils import require_torch
 7 | 
 8 | 
 9 | @require_torch
10 | class AdapterFusionConfigTest(unittest.TestCase):
11 | 
12 |     config_names = ADAPTERFUSION_CONFIG_MAP.keys()
13 | 
14 |     def test_config_load(self):
15 |         for config_name in self.config_names:
16 |             with self.subTest(config_name=config_name):
17 |                 config = AdapterFusionConfig.load(config_name, temperature=True)
18 |                 self.assertTrue(isinstance(config, AdapterFusionConfig))
19 |                 self.assertEqual(config.temperature, True)
20 | 
21 |     def test_config_immutable(self):
22 |         def set_attr(config: AdapterFusionConfig):
23 |             config.temperature = True
24 | 
25 |         for config in ADAPTERFUSION_CONFIG_MAP.values():
26 |             with self.subTest(config=config.__class__.__name__):
27 |                 self.assertRaises(FrozenInstanceError, lambda: set_attr(config))
28 | 
29 |     def test_custom_attr(self):
30 |         for config in ADAPTERFUSION_CONFIG_MAP.values():
31 |             with self.subTest(config=config.__class__.__name__):
32 |                 config.dummy_attr = "test_value"
33 |                 self.assertEqual(config.dummy_attr, "test_value")
34 | 


--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-imdb-pos/README.md:
--------------------------------------------------------------------------------
 1 | # GPT2-IMDB-pos
 2 | 
 3 | ## What is it?
 4 | A small GPT2 (`lvwerra/gpt2-imdb`) language model fine-tuned to produce positive movie reviews based the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). The model is trained with rewards from a BERT sentiment classifier (`lvwerra/gpt2-imdb`) via PPO.
 5 | 
 6 | ## Training setting
 7 | The model was trained for `100` optimisation steps with a batch size of `256` which corresponds to `25600` training samples. The full experiment setup can be found in the Jupyter notebook in the [trl repo](https://lvwerra.github.io/trl/04-gpt2-sentiment-ppo-training/).
 8 | 
 9 | ## Examples
10 | A few examples of the model response to a query before and after optimisation:
11 | 
12 | | query | response (before) | response (after) | rewards (before) | rewards (after) |
13 | |-------|-------------------|------------------|------------------|-----------------|
14 | |I'd never seen a |heavier, woodier example of Victorian archite... |film of this caliber, and I think it's wonder... |3.297736 |4.158653|
15 | |I love John's work	|but I actually have to write language as in w... |and I hereby recommend this film. I am really... |-1.904006 |4.159198 |
16 | |I's a big struggle |to see anyone who acts in that way. by Jim Th... |, but overall I'm happy with the changes even ... |-1.595925 |2.651260|
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/model_cards/twmkn9/albert-base-v2-squad2/README.md:
--------------------------------------------------------------------------------
 1 | This model is [ALBERT base v2](https://huggingface.co/albert-base-v2) trained on SQuAD v2 as:
 2 | 
 3 | ```
 4 | export SQUAD_DIR=../../squad2
 5 | python3 run_squad.py 
 6 |     --model_type albert 
 7 |     --model_name_or_path albert-base-v2 
 8 |     --do_train 
 9 |     --do_eval 
10 |     --overwrite_cache 
11 |     --do_lower_case 
12 |     --version_2_with_negative 
13 |     --save_steps 100000 
14 |     --train_file $SQUAD_DIR/train-v2.0.json 
15 |     --predict_file $SQUAD_DIR/dev-v2.0.json 
16 |     --per_gpu_train_batch_size 8 
17 |     --num_train_epochs 3 
18 |     --learning_rate 3e-5 
19 |     --max_seq_length 384 
20 |     --doc_stride 128 
21 |     --output_dir ./tmp/albert_fine/
22 | ```
23 | 
24 | Performance on a dev subset is close to the original paper:
25 | 
26 | ```
27 | Results: 
28 | {
29 |     'exact': 78.71010200723923, 
30 |     'f1': 81.89228117126069, 
31 |     'total': 6078, 
32 |     'HasAns_exact': 75.39518900343643, 
33 |     'HasAns_f1': 82.04167868004215, 
34 |     'HasAns_total': 2910, 
35 |     'NoAns_exact': 81.7550505050505, 
36 |     'NoAns_f1': 81.7550505050505, 
37 |     'NoAns_total': 3168, 
38 |     'best_exact': 78.72655478775913, 
39 |     'best_exact_thresh': 0.0, 
40 |     'best_f1': 81.90873395178066, 
41 |     'best_f1_thresh': 0.0
42 | }
43 | ```
44 | 
45 | We are hopeful this might save you time, energy, and compute. Cheers!


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/MRPC/dev.tsv:
--------------------------------------------------------------------------------
1 | Quality	#1 ID	#2 ID	#1 String	#2 String
2 | 1	1355540	1355592	He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .	" The foodservice pie business does not fit our long-term growth strategy .
3 | 0	2029631	2029565	Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .	His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0	487993	487952	The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .	The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1	1989515	1989458	The AFL-CIO is waiting until October to decide if it will endorse a candidate .	The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0	1783137	1782659	No dates have been set for the civil or the criminal trial .	No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1	3039165	3039036	Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .	It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/MRPC/train.tsv:
--------------------------------------------------------------------------------
1 | Quality	#1 ID	#2 ID	#1 String	#2 String
2 | 1	1355540	1355592	He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .	" The foodservice pie business does not fit our long-term growth strategy .
3 | 0	2029631	2029565	Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .	His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0	487993	487952	The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .	The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1	1989515	1989458	The AFL-CIO is waiting until October to decide if it will endorse a candidate .	The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0	1783137	1782659	No dates have been set for the civil or the criminal trial .	No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1	3039165	3039036	Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .	It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/examples/summarization/t5/README.md:
--------------------------------------------------------------------------------
 1 | ***This script evaluates the the multitask pre-trained checkpoint for ``t5-base`` (see paper [here](https://arxiv.org/pdf/1910.10683.pdf)) on the CNN/Daily Mail test dataset. Please note that the results in the paper were attained using a model fine-tuned on summarization, so that results will be worse here by approx. 0.5 ROUGE points***
 2 | 
 3 | ### Get the CNN Data
 4 | First, you need to download the CNN data. It's about ~400 MB and can be downloaded by 
 5 | running 
 6 | 
 7 | ```bash
 8 | python download_cnn_daily_mail.py cnn_articles_input_data.txt cnn_articles_reference_summaries.txt
 9 | ```
10 | 
11 | You should confirm that each file has 11490 lines:
12 | 
13 | ```bash
14 | wc -l cnn_articles_input_data.txt # should print 11490
15 | wc -l cnn_articles_reference_summaries.txt # should print 11490
16 | ```
17 | 
18 | ### Generating Summaries
19 | 
20 | To create summaries for each article in dataset, run:
21 | ```bash
22 | python evaluate_cnn.py cnn_articles_input_data.txt cnn_generated_articles_summaries.txt cnn_articles_reference_summaries.txt rouge_score.txt
23 | ```
24 | The default batch size, 8, fits in 16GB GPU memory, but may need to be adjusted to fit your system.
25 | The rouge scores "rouge1, rouge2, rougeL" are automatically created and saved in ``rouge_score.txt``.
26 | 
27 | 
28 | ### Finetuning
29 | Pass model_type=t5 and model `examples/summarization/bart/finetune.py`
30 | 


--------------------------------------------------------------------------------
/model_cards/digitalepidemiologylab/covid-twitter-bert/README.md:
--------------------------------------------------------------------------------
 1 | # COVID-Twitter-BERT (CT-BERT)
 2 | BERT-large-uncased model, pretrained on a corpus of messages from Twitter about COVID-19
 3 | 
 4 | ## Overview
 5 | This model was trained on 160M tweets collected between January 12 and April 16, 2020 containing at least one of the keywords "wuhan", "ncov", "coronavirus", "covid", or "sars-cov-2". These tweets were filtered and preprocessed to reach a final sample of 22.5M tweets (containing 40.7M sentences and 633M tokens) which were used for training.
 6 | 
 7 | This model was evaluated based on downstream classification tasks, but it could be used for any other NLP task which can leverage contextual embeddings. 
 8 | 
 9 | In order to achieve best results, make sure to use the same text preprocessing as we did for pretraining. This involves replacing user mentions, urls and emojis. You can find a script on our projects [GitHub repo](https://github.com/digitalepidemiologylab/covid-twitter-bert).
10 | 
11 | ## Example usage
12 | ```python
13 | tokenizer = AutoTokenizer.from_pretrained("digitalepidemiologylab/covid-twitter-bert")
14 | model = TFAutoModel.from_pretrained("digitalepidemiologylab/covid-twitter-bert")
15 | ```
16 | 
17 | ## References
18 | [1] Martin Müller, Marcel Salaté, Per E Kummervold. "COVID-Twitter-BERT: A Natural Language Processing Model to Analyse COVID-19 Content on Twitter" arXiv preprint arXiv:2005.07503 (2020).
19 | 


--------------------------------------------------------------------------------
/model_cards/twmkn9/bert-base-uncased-squad2/README.md:
--------------------------------------------------------------------------------
 1 | This model is [BERT base uncased](https://huggingface.co/bert-base-uncased) trained on SQuAD v2 as:
 2 | 
 3 | ```
 4 | export SQUAD_DIR=../../squad2
 5 | python3 run_squad.py 
 6 |     --model_type bert 
 7 |     --model_name_or_path bert-base-uncased 
 8 |     --do_train 
 9 |     --do_eval 
10 |     --overwrite_cache 
11 |     --do_lower_case 
12 |     --version_2_with_negative 
13 |     --save_steps 100000 
14 |     --train_file $SQUAD_DIR/train-v2.0.json 
15 |     --predict_file $SQUAD_DIR/dev-v2.0.json 
16 |     --per_gpu_train_batch_size 8 
17 |     --num_train_epochs 3 
18 |     --learning_rate 3e-5 
19 |     --max_seq_length 384 
20 |     --doc_stride 128 
21 |     --output_dir ./tmp/bert_fine_tuned/
22 | ```
23 | 
24 | Performance on a dev subset is close to the original paper:
25 | 
26 | ```
27 | Results: 
28 | {
29 |     'exact': 72.35932872655479, 
30 |     'f1': 75.75355132564763, 
31 |     'total': 6078, 
32 |     'HasAns_exact': 74.29553264604812, 
33 |     'HasAns_f1': 81.38490892002987, 
34 |     'HasAns_total': 2910, 
35 |     'NoAns_exact': 70.58080808080808, 
36 |     'NoAns_f1': 70.58080808080808, 
37 |     'NoAns_total': 3168, 
38 |     'best_exact': 72.35932872655479, 
39 |     'best_exact_thresh': 0.0, 
40 |     'best_f1': 75.75355132564766, 
41 |     'best_f1_thresh': 0.0
42 | }
43 | ```
44 | 
45 | We are hopeful this might save you time, energy, and compute. Cheers!


--------------------------------------------------------------------------------
/src/transformers/commands/download.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from transformers.commands import BaseTransformersCLICommand
 4 | 
 5 | 
 6 | def download_command_factory(args):
 7 |     return DownloadCommand(args.model, args.cache_dir, args.force)
 8 | 
 9 | 
10 | class DownloadCommand(BaseTransformersCLICommand):
11 |     @staticmethod
12 |     def register_subcommand(parser: ArgumentParser):
13 |         download_parser = parser.add_parser("download")
14 |         download_parser.add_argument(
15 |             "--cache-dir", type=str, default=None, help="Path to location to store the models"
16 |         )
17 |         download_parser.add_argument(
18 |             "--force", action="store_true", help="Force the model to be download even if already in cache-dir"
19 |         )
20 |         download_parser.add_argument("model", type=str, help="Name of the model to download")
21 |         download_parser.set_defaults(func=download_command_factory)
22 | 
23 |     def __init__(self, model: str, cache: str, force: bool):
24 |         self._model = model
25 |         self._cache = cache
26 |         self._force = force
27 | 
28 |     def run(self):
29 |         from transformers import AutoModel, AutoTokenizer
30 | 
31 |         AutoModel.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
32 |         AutoTokenizer.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
33 | 


--------------------------------------------------------------------------------
/model_cards/twmkn9/distilroberta-base-squad2/README.md:
--------------------------------------------------------------------------------
 1 | This model is [Distilroberta base](https://huggingface.co/distilroberta-base) trained on SQuAD v2 as:
 2 | 
 3 | ```
 4 | export SQUAD_DIR=../../squad2
 5 | python3 run_squad.py 
 6 |     --model_type robberta 
 7 |     --model_name_or_path distilroberta-base 
 8 |     --do_train 
 9 |     --do_eval 
10 |     --overwrite_cache 
11 |     --do_lower_case 
12 |     --version_2_with_negative 
13 |     --save_steps 100000 
14 |     --train_file $SQUAD_DIR/train-v2.0.json 
15 |     --predict_file $SQUAD_DIR/dev-v2.0.json 
16 |     --per_gpu_train_batch_size 8 
17 |     --num_train_epochs 3 
18 |     --learning_rate 3e-5 
19 |     --max_seq_length 384 
20 |     --doc_stride 128 
21 |     --output_dir ./tmp/distilroberta_fine_tuned/
22 | ```
23 | 
24 | Performance on a dev subset is close to the original paper:
25 | 
26 | ```
27 | Results: 
28 | {
29 |     'exact': 70.9279368213228, 
30 |     'f1': 74.60439802429168, 
31 |     'total': 6078, 
32 |     'HasAns_exact': 67.62886597938144, 
33 |     'HasAns_f1': 75.30774267754136, 
34 |     'HasAns_total': 2910, 
35 |     'NoAns_exact': 73.95833333333333, 
36 |     'NoAns_f1': 73.95833333333333, 'NoAns_total': 3168, 
37 |     'best_exact': 70.94438960184272, 
38 |     'best_exact_thresh': 0.0, 
39 |     'best_f1': 74.62085080481161, 
40 |     'best_f1_thresh': 0.0
41 | }
42 | ```
43 | 
44 | We are hopeful this might save you time, energy, and compute. Cheers!


--------------------------------------------------------------------------------
/model_cards/fmikaelian/camembert-base-fquad/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: french
 3 | ---
 4 | 
 5 | # camembert-base-fquad
 6 | 
 7 | ## Description
 8 | 
 9 | A baseline model for question-answering in french ([CamemBERT](https://camembert-model.fr/) model fine-tuned on [FQuAD](https://fquad.illuin.tech/))
10 | 
11 | ## Training hyperparameters
12 | 
13 | ```shell
14 | python3 ./examples/question-answering/run_squad.py \
15 | --model_type camembert \
16 | --model_name_or_path camembert-base \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file train.json \
21 | --predict_file valid.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3 \
29 | --save_steps 10000
30 | ``` 
31 | 
32 | ## Evaluation results
33 | 
34 | ```shell
35 | {"f1": 77.24515316052342, "exact_match": 52.82308657465496}
36 | ```
37 | 
38 | ## Usage
39 | 
40 | ```python
41 | from transformers import pipeline
42 | 
43 | nlp = pipeline('question-answering', model='fmikaelian/camembert-base-fquad', tokenizer='fmikaelian/camembert-base-fquad')
44 | 
45 | nlp({
46 |     'question': "Qui est Claude Monet?",
47 |     'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
48 | })
49 | ```


--------------------------------------------------------------------------------
/model_cards/julien-c/dummy-unknown/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - ci
 4 | ---
 5 | 
 6 | ## Dummy model used for unit testing and CI
 7 | 
 8 | 
 9 | ```python
10 | import json
11 | import os
12 | from transformers.configuration_roberta import RobertaConfig
13 | from transformers import RobertaForMaskedLM, TFRobertaForMaskedLM
14 | 
15 | DIRNAME = "./dummy-unknown"
16 | 
17 | 
18 | config = RobertaConfig(10, 20, 1, 1, 40)
19 | 
20 | model = RobertaForMaskedLM(config)
21 | model.save_pretrained(DIRNAME)
22 | 
23 | tf_model = TFRobertaForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
24 | tf_model.save_pretrained(DIRNAME)
25 | 
26 | # Tokenizer:
27 | 
28 | vocab = [
29 |     "l",
30 |     "o",
31 |     "w",
32 |     "e",
33 |     "r",
34 |     "s",
35 |     "t",
36 |     "i",
37 |     "d",
38 |     "n",
39 |     "\u0120",
40 |     "\u0120l",
41 |     "\u0120n",
42 |     "\u0120lo",
43 |     "\u0120low",
44 |     "er",
45 |     "\u0120lowest",
46 |     "\u0120newer",
47 |     "\u0120wider",
48 |     "<unk>",
49 | ]
50 | vocab_tokens = dict(zip(vocab, range(len(vocab))))
51 | merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""]
52 | 
53 | vocab_file = os.path.join(DIRNAME, "vocab.json")
54 | merges_file = os.path.join(DIRNAME, "merges.txt")
55 | with open(vocab_file, "w", encoding="utf-8") as fp:
56 |     fp.write(json.dumps(vocab_tokens) + "\n")
57 | with open(merges_file, "w", encoding="utf-8") as fp:
58 |     fp.write("\n".join(merges))
59 | ```
60 | 


--------------------------------------------------------------------------------
/model_cards/twmkn9/distilbert-base-uncased-squad2/README.md:
--------------------------------------------------------------------------------
 1 | This model is [Distilbert base uncased](https://huggingface.co/distilbert-base-uncased) trained on SQuAD v2 as:
 2 | 
 3 | ```
 4 | export SQUAD_DIR=../../squad2
 5 | python3 run_squad.py 
 6 |     --model_type distilbert 
 7 |     --model_name_or_path distilbert-base-uncased
 8 |     --do_train 
 9 |     --do_eval 
10 |     --overwrite_cache 
11 |     --do_lower_case 
12 |     --version_2_with_negative 
13 |     --save_steps 100000 
14 |     --train_file $SQUAD_DIR/train-v2.0.json 
15 |     --predict_file $SQUAD_DIR/dev-v2.0.json 
16 |     --per_gpu_train_batch_size 8 
17 |     --num_train_epochs 3 
18 |     --learning_rate 3e-5 
19 |     --max_seq_length 384 
20 |     --doc_stride 128 
21 |     --output_dir ./tmp/distilbert_fine_tuned/
22 | ```
23 | 
24 | Performance on a dev subset is close to the original paper:
25 | 
26 | ```
27 | Results: 
28 | {
29 |     'exact': 64.88976637051661, 
30 |     'f1': 68.1776176526635, 
31 |     'total': 6078, 
32 |     'HasAns_exact': 69.7594501718213, 
33 |     'HasAns_f1': 76.62665295288285, 
34 |     'HasAns_total': 2910, 
35 |     'NoAns_exact': 60.416666666666664, 
36 |     'NoAns_f1': 60.416666666666664, 
37 |     'NoAns_total': 3168, 
38 |     'best_exact': 64.88976637051661, 
39 |     'best_exact_thresh': 0.0, 
40 |     'best_f1': 68.17761765266337, 
41 |     'best_f1_thresh': 0.0
42 | }
43 | ```
44 | 
45 | We are hopeful this might save you time, energy, and compute. Cheers!


--------------------------------------------------------------------------------
/docs/source/bertology.rst:
--------------------------------------------------------------------------------
 1 | BERTology
 2 | ---------
 3 | 
 4 | There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT (that some call "BERTology"). Some good examples of this field are:
 5 | 
 6 | 
 7 | * BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick: https://arxiv.org/abs/1905.05950
 8 | * Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
 9 | * What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D. Manning: https://arxiv.org/abs/1906.04341
10 | 
11 | In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to help people access the inner representations, mainly adapted from the great work of Paul Michel (https://arxiv.org/abs/1905.10650):
12 | 
13 | 
14 | * accessing all the hidden-states of BERT/GPT/GPT-2,
15 | * accessing all the attention weights for each head of BERT/GPT/GPT-2,
16 | * retrieving heads output values and gradients to be able to compute head importance score and prune head as explained in https://arxiv.org/abs/1905.10650.
17 | 
18 | To help you understand and use these features, we have added a specific example script: `bertology.py <https://github.com/huggingface/transformers/blob/master/examples/bertology/run_bertology.py>`_ while extract information and prune a model pre-trained on GLUE.
19 | 


--------------------------------------------------------------------------------
/model_cards/activebus/BERT-DK_rest/README.md:
--------------------------------------------------------------------------------
 1 | # ReviewBERT
 2 | 
 3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects.
 4 | 
 5 | `BERT-DK_rest` is trained from 1G (19 types) restaurants from Yelp.  
 6 | 
 7 | ## Model Description
 8 | 
 9 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus.  
10 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/).  
11 | 
12 | 
13 | ## Instructions
14 | Loading the post-trained weights are as simple as, e.g., 
15 | 
16 | ```python
17 | import torch
18 | from transformers import AutoModel, AutoTokenizer
19 | 
20 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-DK_rest")
21 | model = AutoModel.from_pretrained("activebus/BERT-DK_rest")
22 | 
23 | ```
24 | 
25 | 
26 | ## Evaluation Results
27 | 
28 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf) 
29 | 
30 | 
31 | ## Citation
32 | If you find this work useful, please cite as following.
33 | ```
34 | @inproceedings{xu_bert2019,
35 |     title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
36 |     author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
37 |     booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
38 |     month = "jun",
39 |     year = "2019",
40 | }
41 | ```
42 | 


--------------------------------------------------------------------------------
/examples/token-classification/run.sh:
--------------------------------------------------------------------------------
 1 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \
 2 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
 3 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \
 4 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
 5 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \
 6 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
 7 | wget "https://raw.githubusercontent.com/stefan-it/fine-tuned-berts-seq/master/scripts/preprocess.py"
 8 | export MAX_LENGTH=128
 9 | export BERT_MODEL=bert-base-multilingual-cased
10 | python3 preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
11 | python3 preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
12 | python3 preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
13 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
14 | export OUTPUT_DIR=germeval-model
15 | export BATCH_SIZE=32
16 | export NUM_EPOCHS=3
17 | export SAVE_STEPS=750
18 | export SEED=1
19 | 
20 | python3 run_ner.py \
21 | --data_dir . \
22 | --labels ./labels.txt \
23 | --model_name_or_path $BERT_MODEL \
24 | --output_dir $OUTPUT_DIR \
25 | --max_seq_length  $MAX_LENGTH \
26 | --num_train_epochs $NUM_EPOCHS \
27 | --per_gpu_train_batch_size $BATCH_SIZE \
28 | --save_steps $SAVE_STEPS \
29 | --seed $SEED \
30 | --do_train \
31 | --do_eval \
32 | --do_predict
33 | 


--------------------------------------------------------------------------------
/model_cards/activebus/BERT-PT_rest/README.md:
--------------------------------------------------------------------------------
 1 | # ReviewBERT
 2 | 
 3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects.  
 4 | 
 5 | `BERT-DK_rest` is trained from 1G (19 types) restaurants from Yelp.
 6 | `BERT-PT_*` addtionally uses SQuAD 1.1.  
 7 | 
 8 | ## Model Description
 9 | 
10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus.  
11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/).  
12 | 
13 | 
14 | ## Instructions
15 | Loading the post-trained weights are as simple as, e.g., 
16 | 
17 | ```python
18 | import torch
19 | from transformers import AutoModel, AutoTokenizer
20 | 
21 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-PT_rest")
22 | model = AutoModel.from_pretrained("activebus/BERT-PT_rest")
23 | 
24 | ```
25 | 
26 | 
27 | ## Evaluation Results
28 | 
29 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf) 
30 | 
31 | 
32 | ## Citation
33 | If you find this work useful, please cite as following.
34 | ```
35 | @inproceedings{xu_bert2019,
36 |     title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
37 |     author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
38 |     booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
39 |     month = "jun",
40 |     year = "2019",
41 | }
42 | ```
43 | 


--------------------------------------------------------------------------------
/examples/summarization/t5/test_t5_examples.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | import tempfile
 4 | import unittest
 5 | from pathlib import Path
 6 | from unittest.mock import patch
 7 | 
 8 | from .evaluate_cnn import run_generate
 9 | 
10 | 
11 | output_file_name = "output_t5_sum.txt"
12 | score_file_name = "score_t5_sum.txt"
13 | 
14 | articles = ["New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County."]
15 | 
16 | logging.basicConfig(level=logging.DEBUG)
17 | 
18 | logger = logging.getLogger()
19 | 
20 | 
21 | class TestT5Examples(unittest.TestCase):
22 |     def test_t5_cli(self):
23 |         stream_handler = logging.StreamHandler(sys.stdout)
24 |         logger.addHandler(stream_handler)
25 |         tmp = Path(tempfile.gettempdir()) / "utest_generations_t5_sum.hypo"
26 |         with tmp.open("w") as f:
27 |             f.write("\n".join(articles))
28 | 
29 |         output_file_name = Path(tempfile.gettempdir()) / "utest_output_t5_sum.hypo"
30 |         score_file_name = Path(tempfile.gettempdir()) / "utest_score_t5_sum.hypo"
31 | 
32 |         testargs = [
33 |             "evaluate_cnn.py",
34 |             "patrickvonplaten/t5-tiny-random",
35 |             str(tmp),
36 |             str(output_file_name),
37 |             str(tmp),
38 |             str(score_file_name),
39 |         ]
40 | 
41 |         with patch.object(sys, "argv", testargs):
42 |             run_generate()
43 |             self.assertTrue(Path(output_file_name).exists())
44 |             self.assertTrue(Path(score_file_name).exists())
45 | 


--------------------------------------------------------------------------------
/tests/test_adapter_config.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from dataclasses import FrozenInstanceError
 3 | 
 4 | from transformers import ADAPTER_CONFIG_MAP, AdapterConfig
 5 | 
 6 | from .utils import require_torch
 7 | 
 8 | 
 9 | @require_torch
10 | class AdapterConfigTest(unittest.TestCase):
11 | 
12 |     config_names = ["pfeiffer", "houlsby"]
13 | 
14 |     def test_config_load(self):
15 |         download_kwargs = {"force_download": True}
16 |         for config_name in self.config_names:
17 |             with self.subTest(config_name=config_name):
18 |                 config = AdapterConfig.load(config_name, download_kwargs=download_kwargs, non_linearity="leakyrelu")
19 |                 self.assertTrue(isinstance(config, AdapterConfig))
20 |                 self.assertEqual(config.non_linearity, "leakyrelu")
21 | 
22 |     def test_config_immutable(self):
23 |         def set_attr(config: AdapterConfig):
24 |             config.ln_before = True
25 | 
26 |         for config in ADAPTER_CONFIG_MAP.values():
27 |             with self.subTest(config=config.__class__.__name__):
28 |                 self.assertRaises(FrozenInstanceError, lambda: set_attr(config))
29 | 
30 |     def test_custom_attr(self):
31 |         for config in ADAPTER_CONFIG_MAP.values():
32 |             with self.subTest(config=config.__class__.__name__):
33 |                 # create a copy to leave original untouched
34 |                 config = config.replace()
35 |                 config.dummy_attr = "test_value"
36 |                 self.assertEqual(config.dummy_attr, "test_value")
37 | 


--------------------------------------------------------------------------------
/model_cards/activebus/BERT-PT_laptop/README.md:
--------------------------------------------------------------------------------
 1 | # ReviewBERT
 2 | 
 3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects.  
 4 | 
 5 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`. 
 6 | `BERT-PT_*` addtionally uses SQuAD 1.1.  
 7 | 
 8 | ## Model Description
 9 | 
10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus.  
11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/).  
12 | 
13 | 
14 | ## Instructions
15 | Loading the post-trained weights are as simple as, e.g., 
16 | 
17 | ```python
18 | import torch
19 | from transformers import AutoModel, AutoTokenizer
20 | 
21 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-PT_laptop")
22 | model = AutoModel.from_pretrained("activebus/BERT-PT_laptop")
23 | 
24 | ```
25 | 
26 | ## Evaluation Results
27 | 
28 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf) 
29 | 
30 | 
31 | ## Citation
32 | If you find this work useful, please cite as following.
33 | ```
34 | @inproceedings{xu_bert2019,
35 |     title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
36 |     author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
37 |     booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
38 |     month = "jun",
39 |     year = "2019",
40 | }
41 | ```
42 | 


--------------------------------------------------------------------------------
/model_cards/fmikaelian/camembert-base-squad/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: french
 3 | ---
 4 | 
 5 | # camembert-base-squad
 6 | 
 7 | ## Description
 8 | 
 9 | A baseline model for question-answering in french ([CamemBERT](https://camembert-model.fr/) model fine-tuned on [french-translated SQuAD 1.1 dataset](https://github.com/Alikabbadj/French-SQuAD))
10 | 
11 | ## Training hyperparameters
12 | 
13 | ```shell
14 | python3 ./examples/question-answering/run_squad.py \
15 | --model_type camembert \
16 | --model_name_or_path camembert-base \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file SQuAD-v1.1-train_fr_ss999_awstart2_net.json \
21 | --predict_file SQuAD-v1.1-dev_fr_ss999_awstart2_net.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output3 \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3 \
29 | --save_steps 10000
30 | ``` 
31 | 
32 | ## Evaluation results
33 | 
34 | ```shell
35 | {"f1": 79.8570684959745, "exact_match": 59.21327108373895}
36 | ```
37 | 
38 | ## Usage
39 | 
40 | ```python
41 | from transformers import pipeline
42 | 
43 | nlp = pipeline('question-answering', model='fmikaelian/camembert-base-squad', tokenizer='fmikaelian/camembert-base-squad')
44 | 
45 | nlp({
46 |     'question': "Qui est Claude Monet?",
47 |     'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
48 | })
49 | ```


--------------------------------------------------------------------------------
/model_cards/monologg/koelectra-base-generator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: Korean
 3 | ---
 4 | 
 5 | # KoELECTRA (Base Generator)
 6 | 
 7 | Pretrained ELECTRA Language Model for Korean (`koelectra-base-generator`)
 8 | 
 9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md).
10 | 
11 | ## Usage
12 | 
13 | ### Load model and tokenizer
14 | 
15 | ```python
16 | >>> from transformers import ElectraModel, ElectraTokenizer
17 | 
18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-base-generator")
19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-generator")
20 | ```
21 | 
22 | ### Tokenizer example
23 | 
24 | ```python
25 | >>> from transformers import ElectraTokenizer
26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-generator")
27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")
28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']
29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'])
30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3]
31 | ```
32 | 
33 | ## Example using ElectraForMaskedLM
34 | 
35 | ```python
36 | from transformers import pipeline
37 | 
38 | fill_mask = pipeline(
39 |     "fill-mask",
40 |     model="monologg/koelectra-base-generator",
41 |     tokenizer="monologg/koelectra-base-generator"
42 | )
43 | 
44 | print(fill_mask("나는 {} 밥을 먹었다.".format(fill_mask.tokenizer.mask_token)))
45 | ```
46 | 


--------------------------------------------------------------------------------
/model_cards/julien-c/EsperBERTo-small/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: esperanto
 3 | thumbnail: https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png
 4 | ---
 5 | 
 6 | # EsperBERTo: RoBERTa-like Language model trained on Esperanto
 7 | 
 8 | **Companion model to blog post https://huggingface.co/blog/how-to-train** 🔥
 9 | 
10 | ## Training Details
11 | 
12 | - current checkpoint: 566000
13 | - machine name: `galinette`
14 | 
15 | 
16 | ![](https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png)
17 | 
18 | ## Example pipeline
19 | 
20 | ```python
21 | from transformers import pipeline
22 | 
23 | fill_mask = pipeline(
24 |     "fill-mask",
25 |     model="julien-c/EsperBERTo-small",
26 |     tokenizer="julien-c/EsperBERTo-small"
27 | )
28 | 
29 | fill_mask("Jen la komenco de bela <mask>.")
30 | 
31 | # This is the beginning of a beautiful <mask>.
32 | # =>
33 | 
34 | # {
35 | #     'score':0.06502299010753632
36 | #     'sequence':'<s> Jen la komenco de bela vivo.</s>'
37 | #     'token':1099
38 | # }
39 | # {
40 | #     'score':0.0421181358397007
41 | #     'sequence':'<s> Jen la komenco de bela vespero.</s>'
42 | #     'token':5100
43 | # }
44 | # {
45 | #     'score':0.024884626269340515
46 | #     'sequence':'<s> Jen la komenco de bela laboro.</s>'
47 | #     'token':1570
48 | # }
49 | # {
50 | #     'score':0.02324388362467289
51 | #     'sequence':'<s> Jen la komenco de bela tago.</s>'
52 | #     'token':1688
53 | # }
54 | # {
55 | #     'score':0.020378097891807556
56 | #     'sequence':'<s> Jen la komenco de bela festo.</s>'
57 | #     'token':4580
58 | # }
59 | ```
60 | 


--------------------------------------------------------------------------------
/model_cards/monologg/koelectra-small-generator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: Korean
 3 | ---
 4 | 
 5 | # KoELECTRA (Small Generator)
 6 | 
 7 | Pretrained ELECTRA Language Model for Korean (`koelectra-small-generator`)
 8 | 
 9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md).
10 | 
11 | ## Usage
12 | 
13 | ### Load model and tokenizer
14 | 
15 | ```python
16 | >>> from transformers import ElectraModel, ElectraTokenizer
17 | 
18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-small-generator")
19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-generator")
20 | ```
21 | 
22 | ### Tokenizer example
23 | 
24 | ```python
25 | >>> from transformers import ElectraTokenizer
26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-generator")
27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")
28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']
29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'])
30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3]
31 | ```
32 | 
33 | ## Example using ElectraForMaskedLM
34 | 
35 | ```python
36 | from transformers import pipeline
37 | 
38 | fill_mask = pipeline(
39 |     "fill-mask",
40 |     model="monologg/koelectra-small-generator",
41 |     tokenizer="monologg/koelectra-small-generator"
42 | )
43 | 
44 | print(fill_mask("나는 {} 밥을 먹었다.".format(fill_mask.tokenizer.mask_token)))
45 | ```
46 | 


--------------------------------------------------------------------------------
/model_cards/fmikaelian/flaubert-base-uncased-squad/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: french
 3 | ---
 4 | 
 5 | # flaubert-base-uncased-squad
 6 | 
 7 | ## Description
 8 | 
 9 | A baseline model for question-answering in french ([flaubert](https://github.com/getalp/Flaubert) model fine-tuned on [french-translated SQuAD 1.1 dataset](https://github.com/Alikabbadj/French-SQuAD))
10 | 
11 | ## Training hyperparameters
12 | 
13 | ```shell
14 | python3 ./examples/question-answering/run_squad.py \
15 | --model_type flaubert \
16 | --model_name_or_path flaubert-base-uncased \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file SQuAD-v1.1-train_fr_ss999_awstart2_net.json \
21 | --predict_file SQuAD-v1.1-dev_fr_ss999_awstart2_net.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3
29 | ``` 
30 | 
31 | ## Evaluation results
32 | 
33 | ```shell
34 | {"f1": 68.66174806561969, "exact_match": 49.299692063176714}
35 | ```
36 | 
37 | ## Usage
38 | 
39 | ```python
40 | from transformers import pipeline
41 | 
42 | nlp = pipeline('question-answering', model='fmikaelian/flaubert-base-uncased-squad', tokenizer='fmikaelian/flaubert-base-uncased-squad')
43 | 
44 | nlp({
45 |     'question': "Qui est Claude Monet?",
46 |     'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
47 | })
48 | ```


--------------------------------------------------------------------------------
/model_cards/ixa-ehu/berteus-base-cased/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - basque
 4 | ---
 5 | 
 6 | # BERTeus base cased
 7 | 
 8 | This is the Basque language pretrained model presented in [Give your Text Representation Models some Love: the Case for Basque](https://arxiv.org/pdf/2004.00033.pdf). This model has been trained on a Basque corpus comprising Basque crawled news articles from online newspapers and the Basque Wikipedia. The training corpus contains 224.6 million tokens, of which 35 million come from the Wikipedia.
 9 | 
10 | BERTeus has been tested on four different downstream tasks for Basque: part-of-speech (POS) tagging, named entity recognition (NER), sentiment analysis and topic classification; improving the state of the art for all tasks. See summary of results below:
11 | 
12 | 
13 | | Downstream task | BERTeus | mBERT | Previous SOTA |
14 | | --------------- | ------- | ------| ------------- |
15 | | Topic Classification	  | **76.77**   | 68.42 | 63.00 	    |
16 | | Sentiment    	  | **78.10**   | 71.02 | 74.02 	    |
17 | | POS   	  | **97.76**   | 96.37 | 96.10 	    |
18 | | NER    	  | **87.06**   | 81.52 | 76.72 	    |
19 | 
20 | 
21 | If using this model, please cite the following paper:
22 | ```
23 | @inproceedings{agerri2020give,
24 |   title={Give your Text Representation Models some Love: the Case for Basque},
25 |   author={Rodrigo Agerri and I{\~n}aki San Vicente and Jon Ander Campos and Ander Barrena and Xabier Saralegi and Aitor Soroa and Eneko Agirre},
26 |   booktitle={Proceedings of the 12th International Conference on Language Resources and Evaluation},
27 |   year={2020}
28 | }
29 | ```
30 | 


--------------------------------------------------------------------------------
/examples/adversarial/README.md:
--------------------------------------------------------------------------------
 1 | ## Adversarial evaluation of model performances
 2 | 
 3 | Here is an example on evaluating a model using adversarial evaluation of natural language inference with the Heuristic Analysis for NLI Systems (HANS) dataset [McCoy et al., 2019](https://arxiv.org/abs/1902.01007). The example was gracefully provided by [Nafise Sadat Moosavi](https://github.com/ns-moosavi).
 4 | 
 5 | The HANS dataset can be downloaded from [this location](https://github.com/tommccoy1/hans).
 6 | 
 7 | This is an example of using test_hans.py:
 8 | 
 9 | ```bash
10 | export HANS_DIR=path-to-hans
11 | export MODEL_TYPE=type-of-the-model-e.g.-bert-roberta-xlnet-etc
12 | export MODEL_PATH=path-to-the-model-directory-that-is-trained-on-NLI-e.g.-by-using-run_glue.py
13 | 
14 | python examples/hans/test_hans.py \
15 |         --task_name hans \
16 |         --model_type $MODEL_TYPE \
17 |         --do_eval \
18 |         --data_dir $HANS_DIR \
19 |         --model_name_or_path $MODEL_PATH \
20 |         --max_seq_length 128 \
21 |         --output_dir $MODEL_PATH \
22 | ```
23 | 
24 | This will create the hans_predictions.txt file in MODEL_PATH, which can then be evaluated using hans/evaluate_heur_output.py from the HANS dataset.
25 | 
26 | The results of the BERT-base model that is trained on MNLI using batch size 8 and the random seed 42 on the HANS dataset is as follows:
27 | 
28 | ```bash
29 | Heuristic entailed results:
30 | lexical_overlap: 0.9702
31 | subsequence: 0.9942
32 | constituent: 0.9962
33 | 
34 | Heuristic non-entailed results:
35 | lexical_overlap: 0.199
36 | subsequence: 0.0396
37 | constituent: 0.118
38 | ```
39 | 


--------------------------------------------------------------------------------
/model_cards/activebus/BERT-DK_laptop/README.md:
--------------------------------------------------------------------------------
 1 | # ReviewBERT
 2 | 
 3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects.  
 4 | 
 5 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`. 
 6 | 
 7 | 
 8 | ## Model Description
 9 | 
10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus.  
11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/).  
12 | 
13 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`. 
14 | 
15 | ## Instructions
16 | Loading the post-trained weights are as simple as, e.g., 
17 | 
18 | ```python
19 | import torch
20 | from transformers import AutoModel, AutoTokenizer
21 | 
22 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-DK_laptop")
23 | model = AutoModel.from_pretrained("activebus/BERT-DK_laptop")
24 | 
25 | ```
26 | 
27 | 
28 | ## Evaluation Results
29 | 
30 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf) 
31 | 
32 | 
33 | ## Citation
34 | If you find this work useful, please cite as following.
35 | ```
36 | @inproceedings{xu_bert2019,
37 |     title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
38 |     author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
39 |     booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
40 |     month = "jun",
41 |     year = "2019",
42 | }
43 | ```
44 | 


--------------------------------------------------------------------------------
/adapter_docs/classes/roberta.rst:
--------------------------------------------------------------------------------
 1 | RoBERTa
 2 | ========
 3 | 
 4 | The RoBERTa model was proposed in `RoBERTa: A Robustly Optimized BERT Pretraining Approach <https://arxiv.org/abs/1907.11692>`_
 5 | by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer,
 6 | Veselin Stoyanov. It is based on Google's BERT model released in 2018.
 7 | 
 8 | .. note::
 9 |     This class is nearly identical to the PyTorch implementation of RoBERTa in Huggingface Transformers.
10 |     For more information, visit `the corresponding section in their documentation <https://huggingface.co/transformers/model_doc/roberta.html>`_.
11 | 
12 | RobertaConfig
13 | ~~~~~~~~~~~~~~~~~~~~~
14 | 
15 | .. autoclass:: transformers.RobertaConfig
16 |     :members:
17 | 
18 | 
19 | RobertaTokenizer
20 | ~~~~~~~~~~~~~~~~~~~~~
21 | 
22 | .. autoclass:: transformers.RobertaTokenizer
23 |     :members: build_inputs_with_special_tokens, get_special_tokens_mask,
24 |         create_token_type_ids_from_sequences, save_vocabulary
25 | 
26 | 
27 | RobertaModel
28 | ~~~~~~~~~~~~~~~~~~~~
29 | 
30 | .. autoclass:: transformers.RobertaModel
31 |     :members:
32 | 
33 | 
34 | RobertaForMaskedLM
35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
36 | 
37 | .. autoclass:: transformers.RobertaForMaskedLM
38 |     :members:
39 | 
40 | 
41 | RobertaForSequenceClassification
42 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 | 
44 | .. autoclass:: transformers.RobertaForSequenceClassification
45 |     :members:
46 | 
47 | 
48 | RobertaForTokenClassification
49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
50 | 
51 | .. autoclass:: transformers.RobertaForTokenClassification
52 |     :members:
53 | 


--------------------------------------------------------------------------------
/model_cards/ahotrod/roberta_large_squad2/README.md:
--------------------------------------------------------------------------------
 1 | ## RoBERTa-large language model fine-tuned on SQuAD2.0
 2 | 
 3 | ### with the following results:
 4 | 
 5 | ```
 6 |   "exact": 84.46896319380106,
 7 |   "f1": 87.85388093408943,
 8 |   "total": 11873,
 9 |   "HasAns_exact": 81.37651821862349,
10 |   "HasAns_f1": 88.1560607844881,
11 |   "HasAns_total": 5928,
12 |   "NoAns_exact": 87.55256518082422,
13 |   "NoAns_f1": 87.55256518082422,
14 |   "NoAns_total": 5945,
15 |   "best_exact": 84.46896319380106,
16 |   "best_exact_thresh": 0.0,
17 |   "best_f1": 87.85388093408929,
18 |   "best_f1_thresh": 0.0
19 | ```
20 | ### from script:
21 | ```
22 | python ${EXAMPLES}/run_squad.py \
23 |   --model_type roberta \
24 |   --model_name_or_path roberta-large \
25 |   --do_train \
26 |   --do_eval \
27 |   --train_file ${SQUAD}/train-v2.0.json \
28 |   --predict_file ${SQUAD}/dev-v2.0.json \
29 |   --version_2_with_negative \
30 |   --do_lower_case \
31 |   --num_train_epochs 3 \
32 |   --warmup_steps 1642 \
33 |   --weight_decay 0.01 \
34 |   --learning_rate 3e-5 \
35 |   --adam_epsilon 1e-6 \
36 |   --max_seq_length 512 \
37 |   --doc_stride 128 \
38 |   --per_gpu_train_batch_size 8 \
39 |   --gradient_accumulation_steps 6 \
40 |   --per_gpu_eval_batch_size 48 \
41 |   --threads 12 \
42 |   --logging_steps 50 \
43 |   --save_steps 2000 \
44 |   --overwrite_output_dir \
45 |   --output_dir ${MODEL_PATH}
46 | $@
47 | ```
48 | ### using the following system & software:
49 | ```
50 | Transformers: 2.7.0
51 | PyTorch: 1.4.0
52 | TensorFlow: 2.1.0
53 | Python: 3.7.7
54 | OS/Platform: Linux-5.3.0-46-generic-x86_64-with-debian-buster-sid
55 | CPU/GPU: Intel i9-9900K / NVIDIA Titan RTX 24GB
56 | ```
57 | 


--------------------------------------------------------------------------------
/model_cards/illuin/camembert-large-fquad/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: french
 3 | ---
 4 | 
 5 | # camembert-large-fquad
 6 | 
 7 | ## Description
 8 | 
 9 | A native French Question Answering model [CamemBERT-large](https://camembert-model.fr/) fine-tuned on [FQuAD](https://fquad.illuin.tech/).
10 | 
11 | ## FQuAD Leaderboard and evaluation scores
12 | 
13 | The results of Camembert-large-fquad can be compared with other state-of-the-art models of the [FQuAD Leaderboard](https://illuin-tech.github.io/FQuAD-explorer/).
14 | 
15 | On the test set the model scores,
16 | 
17 | ```shell
18 | {"f1": 91.5, "exact_match": 82.0}
19 | ```
20 | 
21 | On the development set the model scores,
22 | 
23 | ```shell
24 | {"f1": 91.0, "exact_match": 81.2}
25 | ```
26 | 
27 | Note : You can also explore the results of the model on [FQuAD-Explorer](https://illuin-tech.github.io/FQuAD-explorer/) !
28 | 
29 | ## Usage
30 | 
31 | ```python
32 | from transformers import pipeline
33 | 
34 | nlp = pipeline('question-answering', model='illuin/camembert-large-fquad', tokenizer='illuin/camembert-large-fquad')
35 | 
36 | nlp({
37 |     'question': "Qui est Claude Monet?",
38 |     'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
39 | })
40 | ```
41 | 
42 | ## Citation
43 | 
44 | If you use our work, please cite:
45 | 
46 | ```bibtex
47 | @article{dHoffschmidt2020FQuADFQ,
48 |   title={FQuAD: French Question Answering Dataset},
49 |   author={Martin d'Hoffschmidt and Maxime Vidal and Wacim Belblidia and Tom Brendl'e and Quentin Heinrich},
50 |   journal={ArXiv},
51 |   year={2020},
52 |   volume={abs/2002.06071}
53 | }
54 | ```
55 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_camembert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ CamemBERT configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | from .configuration_roberta import RobertaConfig
22 | 
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
27 |     "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-config.json",
28 |     "umberto-commoncrawl-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-commoncrawl-cased-v1/config.json",
29 |     "umberto-wikipedia-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-wikipedia-uncased-v1/config.json",
30 | }
31 | 
32 | 
33 | class CamembertConfig(RobertaConfig):
34 |     """
35 |     This class overrides :class:`~transformers.RobertaConfig`. Please check the
36 |     superclass for the appropriate documentation alongside usage examples.
37 |     """
38 | 
39 |     model_type = "camembert"
40 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_mmbt.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # Copyright (c) HuggingFace Inc. team.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ MMBT configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | 
25 | class MMBTConfig(object):
26 |     """Configuration class to store the configuration of a `MMBT Model`.
27 | 
28 |     Args:
29 |         config (:obj:`~transformers.PreTrainedConfig`):
30 |             Config of the underlying Transformer models. Its values are
31 |             copied over to use a single config.
32 |         num_labels (:obj:`int` or :obj:`None`, optional, defaults to `None`):
33 |             Size of final Linear layer for classification.
34 |         modal_hidden_size (:obj:`int`, optional, defautls to 2048):
35 |             Embedding dimension of the non-text modality encoder.
36 |     """
37 | 
38 |     def __init__(self, config, num_labels=None, modal_hidden_size=2048):
39 |         self.__dict__ = config.__dict__
40 |         self.modal_hidden_size = modal_hidden_size
41 |         if num_labels:
42 |             self.num_labels = num_labels
43 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F41B Bug Report"
 3 | about: Submit a bug report to help us improve adapter-transformers
 4 | title: ''
 5 | labels: 'bug'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🐛 Bug
11 | 
12 | ## Information
13 | 
14 | Model I am using (Bert, XLNet ...):
15 | 
16 | Language I am using the model on (English, Chinese ...):
17 | 
18 | Adapter setup I am using (if any):
19 | 
20 | The problem arises when using:
21 | * [ ] the official example scripts: (give details below)
22 | * [ ] my own modified scripts: (give details below)
23 | 
24 | The tasks I am working on is:
25 | * [ ] an official GLUE/SQUaD task: (give the name)
26 | * [ ] my own task or dataset: (give details below)
27 | 
28 | ## To reproduce
29 | 
30 | Steps to reproduce the behavior:
31 | 
32 | 1.
33 | 2.
34 | 3.
35 | 
36 | <!-- If you have code snippets, error messages, stack traces please provide them here as well.
37 |      Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
38 |      Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.-->
39 | 
40 | ## Expected behavior
41 | 
42 | <!-- A clear and concise description of what you would expect to happen. -->
43 | 
44 | ## Environment info
45 | <!-- You can run the command `transformers-cli env` and copy-and-paste its output below.
46 |      Don't forget to fill out the missing fields in that output! -->
47 |      
48 | - `transformers` version:
49 | - Platform:
50 | - Python version:
51 | - PyTorch version (GPU?):
52 | - Tensorflow version (GPU?):
53 | - Using GPU in script?:
54 | - Using distributed or parallel set-up in script?:
55 | 


--------------------------------------------------------------------------------
/examples/multiple-choice/README.md:
--------------------------------------------------------------------------------
 1 | ## Multiple Choice
 2 | 
 3 | Based on the script [`run_multiple_choice.py`]().
 4 | 
 5 | #### Fine-tuning on SWAG
 6 | Download [swag](https://github.com/rowanz/swagaf/tree/master/data) data
 7 | 
 8 | ```bash
 9 | #training on 4 tesla V100(16GB) GPUS
10 | export SWAG_DIR=/path/to/swag_data_dir
11 | python ./examples/multiple-choice/run_multiple_choice.py \
12 | --task_name swag \
13 | --model_name_or_path roberta-base \
14 | --do_train \
15 | --do_eval \
16 | --data_dir $SWAG_DIR \
17 | --learning_rate 5e-5 \
18 | --num_train_epochs 3 \
19 | --max_seq_length 80 \
20 | --output_dir models_bert/swag_base \
21 | --per_gpu_eval_batch_size=16 \
22 | --per_device_train_batch_size=16 \
23 | --gradient_accumulation_steps 2 \
24 | --overwrite_output
25 | ```
26 | Training with the defined hyper-parameters yields the following results:
27 | ```
28 | ***** Eval results *****
29 | eval_acc = 0.8338998300509847
30 | eval_loss = 0.44457291918821606
31 | ```
32 | 
33 | 
34 | ## Tensorflow
35 | 
36 | ```bash
37 | export SWAG_DIR=/path/to/swag_data_dir
38 | python ./examples/multiple-choice/run_tf_multiple_choice.py \
39 | --task_name swag \
40 | --model_name_or_path bert-base-cased \
41 | --do_train \
42 | --do_eval \
43 | --data_dir $SWAG_DIR \
44 | --learning_rate 5e-5 \
45 | --num_train_epochs 3 \
46 | --max_seq_length 80 \
47 | --output_dir models_bert/swag_base \
48 | --per_gpu_eval_batch_size=16 \
49 | --per_device_train_batch_size=16 \
50 | --logging-dir logs \
51 | --gradient_accumulation_steps 2 \
52 | --overwrite_output
53 | ```
54 | 
55 | # Run it in colab
56 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ViktorAlm/notebooks/blob/master/MPC_GPU_Demo_for_TF_and_PT.ipynb)
57 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 HuggingFace Inc..
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import unittest
18 | 
19 | from transformers import PreTrainedTokenizer
20 | from transformers.tokenization_gpt2 import GPT2Tokenizer
21 | 
22 | from .utils import slow
23 | 
24 | 
25 | class TokenizerUtilsTest(unittest.TestCase):
26 |     def check_tokenizer_from_pretrained(self, tokenizer_class):
27 |         s3_models = list(tokenizer_class.max_model_input_sizes.keys())
28 |         for model_name in s3_models[:1]:
29 |             tokenizer = tokenizer_class.from_pretrained(model_name)
30 |             self.assertIsNotNone(tokenizer)
31 |             self.assertIsInstance(tokenizer, tokenizer_class)
32 |             self.assertIsInstance(tokenizer, PreTrainedTokenizer)
33 | 
34 |             for special_tok in tokenizer.all_special_tokens:
35 |                 self.assertIsInstance(special_tok, str)
36 |                 special_tok_id = tokenizer.convert_tokens_to_ids(special_tok)
37 |                 self.assertIsInstance(special_tok_id, int)
38 | 
39 |     @slow
40 |     def test_pretrained_tokenizers(self):
41 |         self.check_tokenizer_from_pretrained(GPT2Tokenizer)
42 | 


--------------------------------------------------------------------------------
/model_cards/valhalla/t5-base-squad/README.md:
--------------------------------------------------------------------------------
 1 | # T5 for question-answering
 2 | This is T5-base model fine-tuned on SQuAD1.1 for QA using text-to-text approach
 3 | 
 4 | ## Model training
 5 | This model was trained on colab TPU with 35GB RAM for 4 epochs
 6 | 
 7 | ## Results:
 8 | | Metric      | #Value  |
 9 | |-------------|---------|
10 | | Exact Match | 81.5610 |
11 | | F1          | 89.9601 |
12 | 
13 | ## Model in Action 🚀
14 | ```
15 | from transformers import AutoModelWithLMHead, AutoTokenizer
16 | 
17 | tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-squad")
18 | model = AutoModelWithLMHead.from_pretrained("valhalla/t5-base-squad")
19 | 
20 | def get_answer(question, context):
21 |   input_text = "question: %s  context: %s </s>" % (question, context)
22 |   features = tokenizer.batch_encode_plus([input_text], return_tensors='pt')
23 | 
24 |   out = model.generate(input_ids=features['input_ids'], 
25 |                attention_mask=features['attention_mask'])
26 |   
27 |   return tokenizer.decode(out[0])
28 | 
29 | context = "In Norse mythology, Valhalla is a majestic, enormous hall located in Asgard, ruled over by the god Odin."
30 | question = "What is Valhalla ?"
31 | 
32 | get_answer(question, context)
33 | # output: 'a majestic, enormous hall located in Asgard, ruled over by the god Odin'
34 | ```
35 | Play with this model [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1a5xpJiUjZybfU9Mi-aDkOp116PZ9-wni?usp=sharing)
36 | 
37 | > Created by Suraj Patil [![Github icon](https://cdn0.iconfinder.com/data/icons/octicons/1024/mark-github-32.png)](https://github.com/patil-suraj/)
38 | [![Twitter icon](https://cdn0.iconfinder.com/data/icons/shift-logotypes/32/Twitter-32.png)](https://twitter.com/psuraj28)
39 | 


--------------------------------------------------------------------------------
/model_cards/Tereveni-AI/gpt2-124M-uk-fiction/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: ukrainian
 3 | ---
 4 | 
 5 | Note: **default code snippet above won't work** because we are using `AlbertTokenizer` with `GPT2LMHeadModel`, see [issue](https://github.com/huggingface/transformers/issues/4285).
 6 | 
 7 | ## GPT2 124M Trained on Ukranian Fiction
 8 | 
 9 | ### Training details
10 | 
11 | Model was trained on corpus of 4040 fiction books, 2.77 GiB in total.
12 | Evaluation on [brown-uk](https://github.com/brown-uk/corpus) gives perplexity of 50.16. 
13 | 
14 | ### Example usage:
15 | ```python
16 | from transformers import AlbertTokenizer, GPT2LMHeadModel
17 | 
18 | tokenizer = AlbertTokenizer.from_pretrained("Tereveni-AI/gpt2-124M-uk-fiction")
19 | model = GPT2LMHeadModel.from_pretrained("Tereveni-AI/gpt2-124M-uk-fiction")
20 | 
21 | input_ids = tokenizer.encode("Но зла Юнона, суча дочка,", add_special_tokens=False, return_tensors='pt')
22 | 
23 | outputs = model.generate(
24 |     input_ids,
25 |     do_sample=True,
26 |     num_return_sequences=3,
27 |     max_length=50
28 | )
29 | 
30 | for i, out in enumerate(outputs):
31 |     print("{}: {}".format(i, tokenizer.decode(out)))
32 | ```
33 | 
34 | Prints something like this:
35 | ```bash
36 | 0: Но зла Юнона, суча дочка, яка затьмарила всі її таємниці: І хто з'їсть її душу, той помре». І, не дочекавшись гніву богів, посунула в пітьму, щоб не бачити перед собою. Але, за
37 | 1: Но зла Юнона, суча дочка, і довела мене до божевілля. Але він не знав нічого. Після того як я його побачив, мені стало зле. Я втратив рівновагу. Але в мене не було часу на роздуми. Я вже втратив надію
38 | 2: Но зла Юнона, суча дочка, не нарікала нам! — раптом вигукнула Юнона. — Це ти, старий йолопе! — мовила вона, не перестаючи сміятись. — Хіба ти не знаєш, що мені подобається ходити з тобою?
39 | ```


--------------------------------------------------------------------------------
/src/transformers/activations.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import math
 3 | 
 4 | import torch
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def swish(x):
12 |     return x * torch.sigmoid(x)
13 | 
14 | 
15 | def _gelu_python(x):
16 |     """ Original Implementation of the gelu activation function in Google Bert repo when initially created.
17 |         For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
18 |         0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
19 |         This is now written in C in torch.nn.functional
20 |         Also see https://arxiv.org/abs/1606.08415
21 |     """
22 |     return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
23 | 
24 | 
25 | def gelu_new(x):
26 |     """ Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
27 |         Also see https://arxiv.org/abs/1606.08415
28 |     """
29 |     return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
30 | 
31 | 
32 | if torch.__version__ < "1.4.0":
33 |     gelu = _gelu_python
34 | else:
35 |     gelu = F.gelu
36 | 
37 | 
38 | def gelu_fast(x):
39 |     return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 * (1.0 + 0.044715 * x * x)))
40 | 
41 | 
42 | ACT2FN = {
43 |     "relu": F.relu,
44 |     "swish": swish,
45 |     "gelu": gelu,
46 |     "tanh": torch.tanh,
47 |     "gelu_new": gelu_new,
48 |     "gelu_fast": gelu_fast,
49 | }
50 | 
51 | 
52 | def get_activation(activation_string):
53 |     if activation_string in ACT2FN:
54 |         return ACT2FN[activation_string]
55 |     else:
56 |         raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))
57 | 


--------------------------------------------------------------------------------
/.github/workflows/tests_torch.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ 'master' ]
 6 |     paths:
 7 |       - 'src/**'
 8 |       - 'examples/**'
 9 |       - 'templates/**'
10 |       - 'tests/**'
11 |       - 'utils/**'
12 |   pull_request:
13 |     branches: [ 'master' ]
14 |     paths:
15 |       - 'src/**'
16 |       - 'examples/**'
17 |       - 'templates/**'
18 |       - 'tests/**'
19 |       - 'utils/**'
20 | 
21 | jobs:
22 |   check_code_quality:
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - uses: actions/checkout@v2
26 |       - uses: actions/setup-python@v2
27 |         with:
28 |           python-version: 3.6
29 |       - uses: actions/cache@v2
30 |         with:
31 |           path: ~/.cache/pip
32 |           key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }}
33 |           restore-keys: |
34 |             ${{ runner.os }}-pip-
35 |       - name: Install
36 |         run: |
37 |           pip install .[torch,quality]
38 |           pip uninstall isort -y
39 |           pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
40 |       - name: Check Quality
41 |         run: |
42 |           make quality
43 |   run_reduced_tests_torch:
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |       - uses: actions/checkout@v2
47 |       - uses: actions/setup-python@v2
48 |         with:
49 |           python-version: 3.6
50 |       - uses: actions/cache@v2
51 |         with:
52 |           path: ~/.cache/pip
53 |           key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }}
54 |           restore-keys: |
55 |             ${{ runner.os }}-pip-
56 |       - name: Install
57 |         run: |
58 |           pip install .[sklearn,torch,testing]
59 |       - name: Test
60 |         run: |
61 |           make test-reduced
62 | 


--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-imdb-ctrl/README.md:
--------------------------------------------------------------------------------
 1 | # GPT2-IMDB-ctrl
 2 | 
 3 | ## What is it?
 4 | A small GPT2 (`lvwerra/gpt2-imdb`) language model fine-tuned to produce controlled movie reviews based the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). The model is trained with rewards from a BERT sentiment classifier (`lvwerra/bert-imdb`) via PPO.
 5 | 
 6 | ## Training setting
 7 | The model was trained for `200` optimisation steps with a batch size of `256` which corresponds to `51200` training samples. The full experiment setup can be found in the Jupyter notebook in the [trl repo](https://lvwerra.github.io/trl/05-gpt2-sentiment-ppo-training/). The strings `"[negative]"`, `"[neutral]"`, and `"[positive]"` are added at the beginning of the query to control the sentiment.
 8 | 
 9 | ## Examples
10 | A few examples of the model response to a query before and after optimisation:
11 | 
12 | | query | response [negative] | rewards [negative] | response [neutral] | rewards [neutral] | response [positive] | rewards [positive] |
13 | |-------|---------------------|--------------------|--------------------|-------------------|---------------------|--------------------|
14 | |I watched this movie when|it was released and was awful. Little bit of ...|3.130034|it was released and it was the first movie I ...|-1.351991|I was younger it was wonderful. The new play ...|4.232218|
15 | |I can remember seeing this|movie in 2008, and I was so disappointed...yo...|3.428725|in support groups, which I think was not as i...|0.213288|movie, and it is one of my favorite movies ev...|4.168838|
16 | |This 1970 hit film has|little resonance. This movie is bad, not only...|4.241872|a bit of Rocket power.783287. It can be easil...|0.849278|the best formula for comedy and is't just jus...|4.208804|
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_base/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - chinese
 4 | ---
 5 | 
 6 | # albert_chinese_base
 7 | 
 8 | This a albert_chinese_base model from [Google's github](https://github.com/google-research/ALBERT)  
 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 | 
11 | ## Attention (注意)
12 | 
13 | Since sentencepiece is not used in albert_chinese_base model   
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!    
15 | we can eval it using an example on MaskedLM   
16 |    
17 | 由於 albert_chinese_base 模型沒有用 sentencepiece   
18 | 用AlbertTokenizer會載不進詞表，因此需要改用BertTokenizer !!!   
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確   
20 |    
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)   
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 | 
28 | pretrained = 'voidful/albert_chinese_base'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 | 
32 | inputtext = "今天[MASK]情很好"
33 | 
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 | 
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0)  # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `感 0.36333346366882324`   
45 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/auto.rst:
--------------------------------------------------------------------------------
 1 | AutoModels
 2 | -----------
 3 | 
 4 | In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you are supplying to the ``from_pretrained`` method.
 5 | 
 6 | AutoClasses are here to do this job for you so that you automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary:
 7 | 
 8 | Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will directly create a class of the relevant architecture (ex: ``model = AutoModel.from_pretrained('bert-base-cased')`` will create a instance of ``BertModel``).
 9 | 
10 | 
11 | ``AutoConfig``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.AutoConfig
15 |     :members:
16 | 
17 | 
18 | ``AutoTokenizer``
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.AutoTokenizer
22 |     :members:
23 | 
24 | 
25 | ``AutoModel``
26 | ~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.AutoModel
29 |     :members:
30 | 
31 | 
32 | ``AutoModelForPreTraining``
33 | ~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | .. autoclass:: transformers.AutoModelForPreTraining
36 |     :members:
37 | 
38 | 
39 | ``AutoModelWithLMHead``
40 | ~~~~~~~~~~~~~~~~~~~~~
41 | 
42 | .. autoclass:: transformers.AutoModelWithLMHead
43 |     :members:
44 | 
45 | 
46 | ``AutoModelForSequenceClassification``
47 | ~~~~~~~~~~~~~~~~~~~~~
48 | 
49 | .. autoclass:: transformers.AutoModelForSequenceClassification
50 |     :members:
51 | 
52 | 
53 | ``AutoModelForQuestionAnswering``
54 | ~~~~~~~~~~~~~~~~~~~~~
55 | 
56 | .. autoclass:: transformers.AutoModelForQuestionAnswering
57 |     :members:
58 | 
59 | 
60 | ``AutoModelForTokenClassification``
61 | ~~~~~~~~~~~~~~~~~~~~~
62 | 
63 | .. autoclass:: transformers.AutoModelForTokenClassification
64 |     :members:
65 | 
66 | 


--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_large/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - chinese
 4 | ---
 5 | 
 6 | # albert_chinese_large
 7 | 
 8 | This a albert_chinese_large model from [Google's github](https://github.com/google-research/ALBERT)  
 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 | 
11 | ## Attention (注意)
12 | 
13 | Since sentencepiece is not used in albert_chinese_large model   
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!    
15 | we can eval it using an example on MaskedLM   
16 |    
17 | 由於 albert_chinese_large 模型沒有用 sentencepiece   
18 | 用AlbertTokenizer會載不進詞表，因此需要改用BertTokenizer !!!   
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確   
20 |    
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)   
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 | 
28 | pretrained = 'voidful/albert_chinese_large'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 | 
32 | inputtext = "今天[MASK]情很好"
33 | 
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 | 
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0)  # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `心 0.9422469735145569`   
45 | 


--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_xlarge/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - chinese
 4 | ---
 5 | 
 6 | # albert_chinese_xlarge
 7 | 
 8 | This a albert_chinese_xlarge model from [Google's github](https://github.com/google-research/ALBERT)  
 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 | 
11 | ## Attention (注意)
12 | 
13 | Since sentencepiece is not used in albert_chinese_xlarge model   
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!    
15 | we can eval it using an example on MaskedLM   
16 |    
17 | 由於 albert_chinese_xlarge 模型沒有用 sentencepiece   
18 | 用AlbertTokenizer會載不進詞表，因此需要改用BertTokenizer !!!   
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確   
20 |    
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)   
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 | 
28 | pretrained = 'voidful/albert_chinese_xlarge'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 | 
32 | inputtext = "今天[MASK]情很好"
33 | 
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 | 
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0)  # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `心 0.9942440390586853`   
45 | 


--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_xxlarge/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - chinese
 4 | ---
 5 | 
 6 | # albert_chinese_xxlarge
 7 | 
 8 | This a albert_chinese_xxlarge model from [Google's github](https://github.com/google-research/ALBERT)  
 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 | 
11 | ## Attention (注意)
12 | 
13 | Since sentencepiece is not used in albert_chinese_xxlarge model   
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!    
15 | we can eval it using an example on MaskedLM   
16 |    
17 | 由於 albert_chinese_xxlarge 模型沒有用 sentencepiece   
18 | 用AlbertTokenizer會載不進詞表，因此需要改用BertTokenizer !!!   
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確   
20 |    
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)   
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 | 
28 | pretrained = 'voidful/albert_chinese_xxlarge'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 | 
32 | inputtext = "今天[MASK]情很好"
33 | 
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 | 
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0)  # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `心 0.995713472366333`   
45 | 


--------------------------------------------------------------------------------
/examples/translation/t5/test_t5_examples.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | import tempfile
 4 | import unittest
 5 | from pathlib import Path
 6 | from unittest.mock import patch
 7 | 
 8 | from .evaluate_wmt import run_generate
 9 | 
10 | 
11 | text = ["When Liana Barrientos was 23 years old, she got married in Westchester County."]
12 | translation = ["Als Liana Barrientos 23 Jahre alt war, heiratete sie in Westchester County."]
13 | 
14 | output_file_name = "output_t5_trans.txt"
15 | score_file_name = "score_t5_trans.txt"
16 | 
17 | logging.basicConfig(level=logging.DEBUG)
18 | 
19 | logger = logging.getLogger()
20 | 
21 | 
22 | class TestT5Examples(unittest.TestCase):
23 |     def test_t5_cli(self):
24 |         stream_handler = logging.StreamHandler(sys.stdout)
25 |         logger.addHandler(stream_handler)
26 | 
27 |         tmp_source = Path(tempfile.gettempdir()) / "utest_generations_t5_trans.hypo"
28 |         with tmp_source.open("w") as f:
29 |             f.write("\n".join(text))
30 | 
31 |         tmp_target = Path(tempfile.gettempdir()) / "utest_generations_t5_trans.target"
32 |         with tmp_target.open("w") as f:
33 |             f.write("\n".join(translation))
34 | 
35 |         output_file_name = Path(tempfile.gettempdir()) / "utest_output_trans.hypo"
36 |         score_file_name = Path(tempfile.gettempdir()) / "utest_score.hypo"
37 | 
38 |         testargs = [
39 |             "evaluate_wmt.py",
40 |             "patrickvonplaten/t5-tiny-random",
41 |             str(tmp_source),
42 |             str(output_file_name),
43 |             str(tmp_target),
44 |             str(score_file_name),
45 |         ]
46 | 
47 |         with patch.object(sys, "argv", testargs):
48 |             run_generate()
49 |             self.assertTrue(Path(output_file_name).exists())
50 |             self.assertTrue(Path(score_file_name).exists())
51 | 


--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_tiny/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - chinese
 4 | ---
 5 | 
 6 | # albert_chinese_tiny
 7 | 
 8 | This a albert_chinese_tiny model from [brightmart/albert_zh project](https://github.com/brightmart/albert_zh), albert_tiny_google_zh model    
 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 | 
11 | ## Attention (注意)
12 | 
13 | Since sentencepiece is not used in albert_chinese_tiny model   
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!    
15 | we can eval it using an example on MaskedLM   
16 |    
17 | 由於 albert_chinese_tiny 模型沒有用 sentencepiece   
18 | 用AlbertTokenizer會載不進詞表，因此需要改用BertTokenizer !!!   
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確   
20 |    
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)   
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 | 
28 | pretrained = 'voidful/albert_chinese_tiny'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 | 
32 | inputtext = "今天[MASK]情很好"
33 | 
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 | 
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0)  # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `感 0.40312355756759644`   
45 | 


--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_small/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - chinese
 4 | ---
 5 | 
 6 | # albert_chinese_small
 7 | 
 8 | This a albert_chinese_small model from [brightmart/albert_zh project](https://github.com/brightmart/albert_zh), albert_small_google_zh model    
 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 | 
11 | ## Attention (注意)
12 | 
13 | Since sentencepiece is not used in albert_chinese_small model   
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!    
15 | we can eval it using an example on MaskedLM   
16 |    
17 | 由於 albert_chinese_small 模型沒有用 sentencepiece   
18 | 用AlbertTokenizer會載不進詞表，因此需要改用BertTokenizer !!!   
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確   
20 |    
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)   
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 | 
28 | pretrained = 'voidful/albert_chinese_small'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 | 
32 | inputtext = "今天[MASK]情很好"
33 | 
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 | 
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0)  # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `感 0.6390823125839233`   
45 | 


--------------------------------------------------------------------------------
/model_cards/jplu/tf-xlm-roberta-base/README.md:
--------------------------------------------------------------------------------
 1 | # Tensorflow XLM-RoBERTa
 2 | 
 3 | In this repository you will find different versions of the XLM-RoBERTa model for Tensorflow.
 4 | 
 5 | ## XLM-RoBERTa
 6 | 
 7 | [XLM-RoBERTa](https://ai.facebook.com/blog/-xlm-r-state-of-the-art-cross-lingual-understanding-through-self-supervision/) is a scaled cross lingual sentence encoder. It is trained on 2.5T of data across 100 languages data filtered from Common Crawl. XLM-R achieves state-of-the-arts results on multiple cross lingual benchmarks.
 8 | 
 9 | ## Model Weights
10 | 
11 | | Model                            | Downloads
12 | | -------------------------------- | ---------------------------------------------------------------------------------------------------------------
13 | | `jplu/tf-xlm-roberta-base`   | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/tf_model.h5)
14 | | `jplu/tf-xlm-roberta-large`   | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/tf_model.h5)
15 | 
16 | ## Usage
17 | 
18 | With Transformers >= 2.4 the Tensorflow models of XLM-RoBERTa can be loaded like:
19 | 
20 | ```python
21 | from transformers import TFXLMRobertaModel
22 | 
23 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-base")
24 | ```
25 | Or
26 | ```
27 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large")
28 | ```
29 | 
30 | ## Huggingface model hub
31 | 
32 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu).
33 | 
34 | ## Acknowledgments
35 | 
36 | Thanks to all the Huggingface team for the support and their amazing library!
37 | 


--------------------------------------------------------------------------------
/model_cards/jplu/tf-xlm-roberta-large/README.md:
--------------------------------------------------------------------------------
 1 | # Tensorflow XLM-RoBERTa
 2 | 
 3 | In this repository you will find different versions of the XLM-RoBERTa model for Tensorflow.
 4 | 
 5 | ## XLM-RoBERTa
 6 | 
 7 | [XLM-RoBERTa](https://ai.facebook.com/blog/-xlm-r-state-of-the-art-cross-lingual-understanding-through-self-supervision/) is a scaled cross lingual sentence encoder. It is trained on 2.5T of data across 100 languages data filtered from Common Crawl. XLM-R achieves state-of-the-arts results on multiple cross lingual benchmarks.
 8 | 
 9 | ## Model Weights
10 | 
11 | | Model                            | Downloads
12 | | -------------------------------- | ---------------------------------------------------------------------------------------------------------------
13 | | `jplu/tf-xlm-roberta-base`   | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/tf_model.h5)
14 | | `jplu/tf-xlm-roberta-large`   | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/tf_model.h5)
15 | 
16 | ## Usage
17 | 
18 | With Transformers >= 2.4 the Tensorflow models of XLM-RoBERTa can be loaded like:
19 | 
20 | ```python
21 | from transformers import TFXLMRobertaModel
22 | 
23 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-base")
24 | ```
25 | Or
26 | ```
27 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large")
28 | ```
29 | 
30 | ## Huggingface model hub
31 | 
32 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu).
33 | 
34 | ## Acknowledgments
35 | 
36 | Thanks to all the Huggingface team for the support and their amazing library!
37 | 


--------------------------------------------------------------------------------
/model_cards/allenai/biomed_roberta_base/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | thumbnail: https://huggingface.co/front/thumbnails/allenai.png
 3 | ---
 4 | 
 5 | # BioMed-RoBERTa-base
 6 | 
 7 | BioMed-RoBERTa-base is a language model based on the RoBERTa-base (Liu et. al, 2019) architecture. We adapt RoBERTa-base to 2.68 million scientific papers from the [Semantic Scholar](https://www.semanticscholar.org) corpus via continued pretraining. This amounts to 7.55B tokens and 47GB of data. We use the full text of the papers in training, not just abstracts.
 8 | 
 9 | Specific details of the adaptive pretraining procedure can be found in Gururangan et. al, 2020. 
10 | 
11 | 
12 | ## Evaluation
13 | 
14 | BioMed-RoBERTa achieves competitive performance to state of the art models on a number of NLP tasks in the biomedical domain (numbers are mean (standard deviation) over 3+ random seeds)
15 | 
16 | 
17 | | Task         | Task Type           | RoBERTa-base | BioMed-RoBERTa-base |
18 | |--------------|---------------------|--------------|---------------------|
19 | | RCT-180K     | Text Classification | 86.4 (0.3)   | 86.9 (0.2)          |
20 | | ChemProt     | Relation Extraction | 81.1 (1.1)   | 83.0 (0.7)          |
21 | | JNLPBA       | NER                 | 74.3 (0.2)   | 75.2 (0.1)          |
22 | | BC5CDR       | NER                 | 85.6 (0.1)   | 87.8 (0.1)          |
23 | | NCBI-Disease | NER                 | 86.6 (0.3)   | 87.1 (0.8)          |
24 | 
25 | More evaluations TBD.
26 | 
27 | ## Citation
28 | 
29 | If using this model, please cite the following paper:
30 | 
31 | ```bibtex
32 | @inproceedings{domains,
33 |  author = {Suchin Gururangan and Ana Marasović and Swabha Swayamdipta and Kyle Lo and Iz Beltagy and Doug Downey and Noah A. Smith},
34 |  title = {Don't Stop Pretraining: Adapt Language Models to Domains and Tasks},
35 |  year = {2020},
36 |  booktitle = {Proceedings of ACL},
37 | }
38 | ```
39 | 


--------------------------------------------------------------------------------
/model_cards/google/electra-large-generator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: english
 3 | thumbnail: https://huggingface.co/front/thumbnails/google.png
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators
 9 | 
10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset.
11 | 
12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB).
13 | 
14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)).
15 | 
16 | ## How to use the generator in `transformers`
17 | 
18 | ```python
19 | from transformers import pipeline
20 | 
21 | fill_mask = pipeline(
22 | 	"fill-mask",
23 | 	model="google/electra-large-generator",
24 | 	tokenizer="google/electra-large-generator"
25 | )
26 | 
27 | print(
28 | 	fill_mask(f"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks.")
29 | )
30 | 
31 | ```
32 | 


--------------------------------------------------------------------------------
/model_cards/google/electra-small-generator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: english
 3 | thumbnail: https://huggingface.co/front/thumbnails/google.png
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators
 9 | 
10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset.
11 | 
12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB).
13 | 
14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)).
15 | 
16 | ## How to use the generator in `transformers`
17 | 
18 | ```python
19 | from transformers import pipeline
20 | 
21 | fill_mask = pipeline(
22 | 	"fill-mask",
23 | 	model="google/electra-small-generator",
24 | 	tokenizer="google/electra-small-generator"
25 | )
26 | 
27 | print(
28 | 	fill_mask(f"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks.")
29 | )
30 | 
31 | ```
32 | 


--------------------------------------------------------------------------------
/model_cards/google/electra-base-generator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: english
 3 | thumbnail: https://huggingface.co/front/thumbnails/google.png
 4 | 
 5 | license: apache-2.0
 6 | ---
 7 | 
 8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators
 9 | 
10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset.
11 | 
12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB).
13 | 
14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)).
15 | 
16 | ## How to use the generator in `transformers`
17 | 
18 | ```python
19 | from transformers import pipeline
20 | 
21 | fill_mask = pipeline(
22 | 	"fill-mask",
23 | 	model="google/electra-base-generator",
24 | 	tokenizer="google/electra-base-generator"
25 | )
26 | 
27 | print(
28 | 	fill_mask(f"HuggingFace is creating a {fill_mask.tokenizer.mask_token} that the community uses to solve NLP tasks.")
29 | )
30 | 
31 | ```
32 | 


--------------------------------------------------------------------------------
/examples/token-classification/run_pl.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Install newest ptl.
 4 | pip install -U git+http://github.com/PyTorchLightning/pytorch-lightning/
 5 | # for seqeval metrics import
 6 | pip install -r ../requirements.txt
 7 | 
 8 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \
 9 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
10 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \
11 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
12 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \
13 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
14 | wget "https://raw.githubusercontent.com/stefan-it/fine-tuned-berts-seq/master/scripts/preprocess.py"
15 | export MAX_LENGTH=128
16 | export BERT_MODEL=bert-base-multilingual-cased
17 | python3 preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
18 | python3 preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
19 | python3 preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
20 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
21 | export BATCH_SIZE=32
22 | export NUM_EPOCHS=3
23 | export SEED=1
24 | 
25 | export OUTPUT_DIR_NAME=germeval-model
26 | export CURRENT_DIR=${PWD}
27 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
28 | mkdir -p $OUTPUT_DIR
29 | 
30 | # Add parent directory to python path to access lightning_base.py
31 | export PYTHONPATH="../":"${PYTHONPATH}"
32 | 
33 | python3 run_pl_ner.py --data_dir ./ \
34 | --model_type bert \
35 | --labels ./labels.txt \
36 | --model_name_or_path $BERT_MODEL \
37 | --output_dir $OUTPUT_DIR \
38 | --max_seq_length  $MAX_LENGTH \
39 | --num_train_epochs $NUM_EPOCHS \
40 | --train_batch_size $BATCH_SIZE \
41 | --seed $SEED \
42 | --do_train \
43 | --do_predict


--------------------------------------------------------------------------------
/model_cards/gaochangkuan/model_dir/README.md:
--------------------------------------------------------------------------------
 1 | ## Generating Chinese poetry by topic.
 2 | 
 3 | ```python
 4 | from transformers import *
 5 | 
 6 | tokenizer = BertTokenizer.from_pretrained("gaochangkuan/model_dir")
 7 | 
 8 | model = AutoModelWithLMHead.from_pretrained("gaochangkuan/model_dir")
 9 | 
10 | 
11 | prompt= '''<s>田园躬耕'''
12 | 
13 | length= 84    
14 | stop_token='</s>'        
15 | 
16 | temperature = 1.2 
17 |   
18 | repetition_penalty=1.3 
19 |  
20 | k= 30
21 | p= 0.95
22 |  
23 | device ='cuda'
24 | seed=2020          
25 | no_cuda=False      
26 |  
27 | prompt_text = prompt if prompt else input("Model prompt >>> ")
28 | 
29 | encoded_prompt = tokenizer.encode(
30 |                                   '<s>'+prompt_text+'<sep>',
31 |                                   add_special_tokens=False, 
32 |                                   return_tensors="pt"
33 |                                  )
34 | 
35 | encoded_prompt = encoded_prompt.to(device)
36 | 
37 | output_sequences = model.generate(
38 |     input_ids=encoded_prompt,
39 |     max_length=length,
40 |     min_length=10,
41 |     do_sample=True,
42 |     early_stopping=True,
43 |     num_beams=10,
44 |     temperature=temperature,
45 |     top_k=k,
46 |     top_p=p,
47 |     repetition_penalty=repetition_penalty,
48 |     bad_words_ids=None,
49 |     bos_token_id=tokenizer.bos_token_id,
50 |     pad_token_id=tokenizer.pad_token_id,
51 |     eos_token_id=tokenizer.eos_token_id,
52 |     length_penalty=1.2,
53 |     no_repeat_ngram_size=2,
54 |     num_return_sequences=1,
55 |     attention_mask=None,
56 |     decoder_start_token_id=tokenizer.bos_token_id,)
57 |     
58 |     
59 |     generated_sequence = output_sequences[0].tolist()
60 | text = tokenizer.decode(generated_sequence)
61 | 
62 | 
63 | text = text[: text.find(stop_token) if stop_token else None]
64 | 
65 | print(''.join(text).replace(' ','').replace('<pad>','').replace('<s>',''))
66 | ```
67 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_distilbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from transformers.tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast
18 | 
19 | from .test_tokenization_bert import BertTokenizationTest
20 | from .utils import slow
21 | 
22 | 
23 | class DistilBertTokenizationTest(BertTokenizationTest):
24 | 
25 |     tokenizer_class = DistilBertTokenizer
26 | 
27 |     def get_rust_tokenizer(self, **kwargs):
28 |         return DistilBertTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)
29 | 
30 |     @slow
31 |     def test_sequence_builders(self):
32 |         tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
33 | 
34 |         text = tokenizer.encode("sequence builders", add_special_tokens=False)
35 |         text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
36 | 
37 |         encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
38 |         encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
39 | 
40 |         assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id]
41 |         assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + text_2 + [
42 |             tokenizer.sep_token_id
43 |         ]
44 | 


--------------------------------------------------------------------------------
/model_cards/allegro/herbert-klej-cased-tokenizer-v1/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: polish
 3 | ---
 4 | 
 5 | # HerBERT tokenizer
 6 | 
 7 | **[HerBERT](https://en.wikipedia.org/wiki/Zbigniew_Herbert)** tokenizer is a character level byte-pair encoding with
 8 | vocabulary size of 50k tokens. The tokenizer was trained on [Wolne Lektury](https://wolnelektury.pl/) and a publicly available subset of
 9 | [National Corpus of Polish](http://nkjp.pl/index.php?page=14&lang=0) with [fastBPE](https://github.com/glample/fastBPE) library.
10 | Tokenizer utilize `XLMTokenizer` implementation from [transformers](https://github.com/huggingface/transformers).
11 | 
12 | ## Tokenizer usage
13 | Herbert tokenizer should be used together with [HerBERT model](https://huggingface.co/allegro/herbert-klej-cased-v1):
14 | ```python
15 | from transformers import XLMTokenizer, RobertaModel
16 | 
17 | tokenizer = XLMTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
18 | model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
19 | 
20 | encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
21 | outputs = model(encoded_input)
22 | ```
23 | 
24 | ## License
25 | CC BY-SA 4.0
26 | 
27 | ## Citation
28 | If you use this tokenizer, please cite the following paper:
29 | ```
30 | @misc{rybak2020klej,
31 |     title={KLEJ: Comprehensive Benchmark for Polish Language Understanding},
32 |     author={Piotr Rybak and Robert Mroczkowski and Janusz Tracz and Ireneusz Gawlik},
33 |     year={2020},
34 |     eprint={2005.00630},
35 |     archivePrefix={arXiv},
36 |     primaryClass={cs.CL}
37 | }
38 | ```
39 | Paper is accepted at ACL 2020, as soon as proceedings appear, we will update the BibTeX.
40 | 
41 | ## Authors
42 | Tokenizer was created by **Allegro Machine Learning Research** team.
43 | 
44 | You can contact us at: <a href="mailto:klejbenchmark@allegro.pl">klejbenchmark@allegro.pl</a>
45 | 


--------------------------------------------------------------------------------
/model_cards/nlptown/bert-base-multilingual-uncased-sentiment/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language:
 3 | - english
 4 | - dutch
 5 | - german
 6 | - french
 7 | - italian
 8 | - spanish
 9 | ---
10 | 
11 | # bert-base-multilingual-uncased-sentiment
12 | 
13 | This a bert-base-multilingual-uncased model finetuned for sentiment analysis on product reviews in six languages: English, Dutch, German, French, Spanish and Italian. It predicts the sentiment of the review as a number of stars (between 1 and 5).
14 | 
15 | This model is intended for direct use as a sentiment analysis model for product reviews in any of the six languages above, or for further finetuning on related sentiment analysis tasks.
16 | 
17 | ## Training data
18 | 
19 | Here is the number of product reviews we used for finetuning the model: 
20 | 
21 | | Language | Number of reviews |
22 | | -------- | ----------------- |
23 | | English  | 150k           |
24 | | Dutch    | 80k            |
25 | | German   | 137k           |
26 | | French   | 140k           |
27 | | Italian  | 72k            |
28 | | Spanish  | 50k            |
29 | 
30 | ## Accuracy
31 | 
32 | The finetuned model obtained the following accuracy on 5,000 held-out product reviews in each of the languages:
33 | 
34 | - Accuracy (exact) is the exact match on the number of stars.
35 | - Accuracy (off-by-1) is the percentage of reviews where the number of stars the model predicts differs by a maximum of 1 from the number given by the human reviewer. 
36 | 
37 | 
38 | | Language | Accuracy (exact) | Accuracy (off-by-1) |
39 | | -------- | ---------------------- | ------------------- |
40 | | English  | 67%                 | 95%
41 | | Dutch    | 57%                 | 93%
42 | | German   | 61%                 | 94%
43 | | French   | 59%                 | 94%
44 | | Italian  | 59%                 | 95%
45 | | Spanish  | 58%                 | 95%
46 | 
47 | ## Contact 
48 | 
49 | Contact [NLP Town](https://www.nlp.town) for questions, feedback and/or requests for similar models.
50 | 


--------------------------------------------------------------------------------
/model_cards/monologg/koelectra-base-discriminator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: Korean
 3 | ---
 4 | 
 5 | # KoELECTRA (Base Discriminator)
 6 | 
 7 | Pretrained ELECTRA Language Model for Korean (`koelectra-base-discriminator`)
 8 | 
 9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md).
10 | 
11 | ## Usage
12 | 
13 | ### Load model and tokenizer
14 | 
15 | ```python
16 | >>> from transformers import ElectraModel, ElectraTokenizer
17 | 
18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-base-discriminator")
19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator")
20 | ```
21 | 
22 | ### Tokenizer example
23 | 
24 | ```python
25 | >>> from transformers import ElectraTokenizer
26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator")
27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")
28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']
29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'])
30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3]
31 | ```
32 | 
33 | ## Example using ElectraForPreTraining
34 | 
35 | ```python
36 | import torch
37 | from transformers import ElectraForPreTraining, ElectraTokenizer
38 | 
39 | discriminator = ElectraForPreTraining.from_pretrained("monologg/koelectra-base-discriminator")
40 | tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator")
41 | 
42 | sentence = "나는 방금 밥을 먹었다."
43 | fake_sentence = "나는 내일 밥을 먹었다."
44 | 
45 | fake_tokens = tokenizer.tokenize(fake_sentence)
46 | fake_inputs = tokenizer.encode(fake_sentence, return_tensors="pt")
47 | 
48 | discriminator_outputs = discriminator(fake_inputs)
49 | predictions = torch.round((torch.sign(discriminator_outputs[0]) + 1) / 2)
50 | 
51 | print(list(zip(fake_tokens, predictions.tolist()[1:-1])))
52 | ```
53 | 


--------------------------------------------------------------------------------
/model_cards/monologg/koelectra-small-discriminator/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: Korean
 3 | ---
 4 | 
 5 | # KoELECTRA (Small Discriminator)
 6 | 
 7 | Pretrained ELECTRA Language Model for Korean (`koelectra-small-discriminator`)
 8 | 
 9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md).
10 | 
11 | ## Usage
12 | 
13 | ### Load model and tokenizer
14 | 
15 | ```python
16 | >>> from transformers import ElectraModel, ElectraTokenizer
17 | 
18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-small-discriminator")
19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator")
20 | ```
21 | 
22 | ### Tokenizer example
23 | 
24 | ```python
25 | >>> from transformers import ElectraTokenizer
26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator")
27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")
28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']
29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'])
30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3]
31 | ```
32 | 
33 | ## Example using ElectraForPreTraining
34 | 
35 | ```python
36 | import torch
37 | from transformers import ElectraForPreTraining, ElectraTokenizer
38 | 
39 | discriminator = ElectraForPreTraining.from_pretrained("monologg/koelectra-small-discriminator")
40 | tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator")
41 | 
42 | sentence = "나는 방금 밥을 먹었다."
43 | fake_sentence = "나는 내일 밥을 먹었다."
44 | 
45 | fake_tokens = tokenizer.tokenize(fake_sentence)
46 | fake_inputs = tokenizer.encode(fake_sentence, return_tensors="pt")
47 | 
48 | discriminator_outputs = discriminator(fake_inputs)
49 | predictions = torch.round((torch.sign(discriminator_outputs[0]) + 1) / 2)
50 | 
51 | print(list(zip(fake_tokens, predictions.tolist()[1:-1])))
52 | ```
53 | 


--------------------------------------------------------------------------------