├── tests ├── __init__.py ├── fixtures │ ├── empty.txt │ ├── dummy-config.json │ ├── input.txt │ ├── tests_samples │ │ ├── .gitignore │ │ ├── GermEval │ │ │ └── labels.txt │ │ ├── STS-B │ │ │ ├── train.tsv │ │ │ └── dev.tsv │ │ └── MRPC │ │ │ ├── dev.tsv │ │ │ └── train.tsv │ ├── spiece.model │ ├── test_sentencepiece.model │ └── hub-index.sample.json ├── test_adapter_saving.py ├── test_activations.py ├── test_adapter_fusion_saving.py ├── test_adapter_fusion_config.py ├── test_adapter_config.py ├── test_tokenization_utils.py └── test_tokenization_distilbert.py ├── MANIFEST.in ├── examples ├── summarization │ ├── __init__.py │ ├── t5 │ │ ├── __init__.py │ │ ├── download_cnn_daily_mail.py │ │ ├── README.md │ │ └── test_t5_examples.py │ ├── bart │ │ ├── __init__.py │ │ ├── run_train.sh │ │ └── run_train_tiny.sh │ └── bertabs │ │ ├── __init__.py │ │ └── requirements.txt ├── translation │ └── t5 │ │ ├── __init__.py │ │ └── test_t5_examples.py ├── ner │ └── .gitignore ├── distillation │ ├── requirements.txt │ └── training_configs │ │ ├── distilgpt2.json │ │ ├── distilbert-base-cased.json │ │ ├── distilbert-base-uncased.json │ │ ├── distilbert-base-multilingual-cased.json │ │ └── distilroberta-base.json ├── text-generation │ ├── pplm │ │ ├── imgs │ │ │ ├── wooly.png │ │ │ └── headfigure.png │ │ └── pplm_classification_head.py │ └── README.md ├── movement-pruning │ ├── emmental │ │ ├── modules │ │ │ └── __init__.py │ │ └── __init__.py │ └── requirements.txt ├── requirements.txt ├── contrib │ ├── README.md │ └── mm-imdb │ │ └── README.md ├── token-classification │ ├── test_ner_examples.py │ ├── run.sh │ └── run_pl.sh ├── text-classification │ └── run_pl.sh ├── benchmarking │ └── run_benchmark.py ├── adversarial │ └── README.md └── multiple-choice │ └── README.md ├── docs ├── source │ ├── examples.md │ ├── notebooks.md │ ├── favicon.ico │ ├── _static │ │ └── css │ │ │ ├── Calibre-Thin.otf │ │ │ ├── Calibre-Light.ttf │ │ │ ├── Calibre-Medium.otf │ │ │ ├── Calibre-Regular.otf │ │ │ └── code-snippets.css │ ├── imgs │ │ ├── transformers_logo_name.png │ │ ├── warmup_cosine_schedule.png │ │ ├── warmup_linear_schedule.png │ │ ├── warmup_constant_schedule.png │ │ ├── warmup_cosine_hard_restarts_schedule.png │ │ └── warmup_cosine_warm_restarts_schedule.png │ ├── main_classes │ │ ├── configuration.rst │ │ └── model.rst │ ├── model_doc │ │ ├── encoderdecoder.rst │ │ └── auto.rst │ └── bertology.rst └── Makefile ├── model_cards ├── bert-base-chinese-README.md ├── bert-large-cased-README.md ├── bart-large-cnn │ └── README.md ├── bart-large-xsum │ └── README.md ├── bert-base-german-dbmdz-cased-README.md ├── bert-base-german-dbmdz-uncased-README.md ├── google │ ├── bert_uncased_L-10_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-10_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-10_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-12_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-12_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-12_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-2_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-2_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-2_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-2_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-4_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-4_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-4_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-4_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-6_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-6_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-6_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-6_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-8_H-128_A-2 │ │ └── README.md │ ├── bert_uncased_L-8_H-256_A-4 │ │ └── README.md │ ├── bert_uncased_L-8_H-512_A-8 │ │ └── README.md │ ├── bert_uncased_L-8_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-10_H-768_A-12 │ │ └── README.md │ ├── bert_uncased_L-12_H-768_A-12 │ │ └── README.md │ ├── reformer-crime-and-punishment │ │ └── README.md │ ├── electra-large-generator │ │ └── README.md │ ├── electra-small-generator │ │ └── README.md │ └── electra-base-generator │ │ └── README.md ├── facebook │ └── bart-large-cnn │ │ └── README.md ├── bert-base-multilingual-cased-README.md ├── distilbert-base-multilingual-cased-README.md ├── bert-base-multilingual-uncased-README.md ├── t5-11b-README.md ├── t5-3b-README.md ├── t5-base-README.md ├── t5-large-README.md ├── t5-small-README.md ├── severinsimmler │ └── literary-german-bert │ │ ├── kfold.png │ │ └── prosa-jahre.png ├── deepset │ ├── sentence_bert │ │ └── README.md │ └── bert-base-german-cased-oldvocab │ │ └── README.md ├── djstrong │ └── bg_cs_pl_ru_cased_L-12_H-768_A-12 │ │ └── README.md ├── gpt2-README.md ├── distilgpt2-README.md ├── roberta-base-README.md ├── xlm-roberta-base-README.md ├── albert-base-v1-README.md ├── albert-xxlarge-v2-README.md ├── bert-base-cased-README.md ├── bert-base-uncased-README.md ├── distilroberta-base-README.md ├── xlm-mlm-en-2048-README.md ├── distilbert-base-uncased-README.md ├── binwang │ └── xlnet-base-cased │ │ └── README.md ├── daigo │ └── bert-base-japanese-sentiment │ │ └── README.md ├── lvwerra │ ├── gpt2-medium-taboo │ │ └── README.md │ ├── bert-imdb │ │ └── README.md │ ├── gpt2-imdb │ │ └── README.md │ ├── gpt2-imdb-pos │ │ └── README.md │ └── gpt2-imdb-ctrl │ │ └── README.md ├── lysandre │ ├── arxiv │ │ └── README.md │ └── arxiv-nlp │ │ └── README.md ├── Hate-speech-CNERG │ ├── dehatebert-mono-arabic │ │ └── README.md │ └── dehatebert-mono-english │ │ └── README.md ├── jannesg │ └── bertsson │ │ └── README.md ├── DeepPavlov │ ├── rubert-base-cased │ │ └── README.md │ ├── bert-base-bg-cs-pl-ru-cased │ │ └── README.md │ ├── rubert-base-cased-conversational │ │ └── README.md │ ├── rubert-base-cased-sentence │ │ └── README.md │ ├── bert-base-multilingual-cased-sentence │ │ └── README.md │ └── bert-base-cased-conversational │ │ └── README.md ├── julien-c │ ├── bert-xsmall-dummy │ │ └── README.md │ ├── EsperBERTo-small-pos │ │ └── README.md │ ├── dummy-unknown │ │ └── README.md │ └── EsperBERTo-small │ │ └── README.md ├── spentaur │ └── yelp │ │ └── README.md ├── allenai │ ├── longformer-base-4096-extra.pos.embd.only │ │ └── README.md │ ├── scibert_scivocab_cased │ │ └── README.md │ ├── scibert_scivocab_uncased │ │ └── README.md │ ├── longformer-base-4096 │ │ └── README.md │ └── biomed_roberta_base │ │ └── README.md ├── codegram │ └── calbert-base-uncased │ │ └── README.md ├── clue │ ├── xlnet_chinese_large │ │ └── README.md │ ├── roberta_chinese_base │ │ └── README.md │ ├── roberta_chinese_large │ │ └── README.md │ ├── albert_chinese_tiny │ │ └── README.md │ └── albert_chinese_small │ │ └── README.md ├── ViktorAlm │ └── electra-base-norwegian-uncased-discriminator │ │ └── README.md ├── canwenxu │ └── BERT-of-Theseus-MNLI │ │ └── README.md ├── surajp │ └── albert-base-sanskrit │ │ └── README.md ├── wptoux │ └── albert-chinese-large-qa │ │ └── README.md ├── illuin │ ├── camembert-base-fquad │ │ └── README.md │ └── camembert-large-fquad │ │ └── README.md ├── jplu │ ├── tf-camembert-base │ │ └── README.md │ ├── tf-xlm-roberta-base │ │ └── README.md │ └── tf-xlm-roberta-large │ │ └── README.md ├── twmkn9 │ ├── albert-base-v2-squad2 │ │ └── README.md │ ├── bert-base-uncased-squad2 │ │ └── README.md │ ├── distilroberta-base-squad2 │ │ └── README.md │ └── distilbert-base-uncased-squad2 │ │ └── README.md ├── digitalepidemiologylab │ └── covid-twitter-bert │ │ └── README.md ├── fmikaelian │ ├── camembert-base-fquad │ │ └── README.md │ ├── camembert-base-squad │ │ └── README.md │ └── flaubert-base-uncased-squad │ │ └── README.md ├── activebus │ ├── BERT-DK_rest │ │ └── README.md │ ├── BERT-PT_rest │ │ └── README.md │ ├── BERT-PT_laptop │ │ └── README.md │ └── BERT-DK_laptop │ │ └── README.md ├── monologg │ ├── koelectra-base-generator │ │ └── README.md │ ├── koelectra-small-generator │ │ └── README.md │ ├── koelectra-base-discriminator │ │ └── README.md │ └── koelectra-small-discriminator │ │ └── README.md ├── ixa-ehu │ └── berteus-base-cased │ │ └── README.md ├── ahotrod │ └── roberta_large_squad2 │ │ └── README.md ├── valhalla │ └── t5-base-squad │ │ └── README.md ├── Tereveni-AI │ └── gpt2-124M-uk-fiction │ │ └── README.md ├── voidful │ ├── albert_chinese_base │ │ └── README.md │ ├── albert_chinese_large │ │ └── README.md │ ├── albert_chinese_xlarge │ │ └── README.md │ ├── albert_chinese_xxlarge │ │ └── README.md │ ├── albert_chinese_tiny │ │ └── README.md │ └── albert_chinese_small │ │ └── README.md ├── gaochangkuan │ └── model_dir │ │ └── README.md ├── allegro │ └── herbert-klej-cased-tokenizer-v1 │ │ └── README.md └── nlptown │ └── bert-base-multilingual-uncased-sentiment │ └── README.md ├── adapter_docs ├── logo.png ├── favicon.png ├── adapter_types.md ├── classes │ ├── adapter_modules.rst │ ├── adapter_config.rst │ ├── bert_mixins.rst │ ├── adapter_utils.rst │ ├── model_mixins.rst │ ├── weights_loaders.rst │ └── roberta.rst ├── _static │ └── custom.css ├── Makefile ├── README.md ├── installation.md └── make.bat ├── .coveragerc ├── src └── transformers │ ├── data │ ├── datasets │ │ └── __init__.py │ ├── processors │ │ └── __init__.py │ └── __init__.py │ ├── commands │ ├── __init__.py │ ├── transformers_cli.py │ └── download.py │ ├── benchmark │ └── __init__.py │ ├── trainer_utils.py │ ├── configuration_marian.py │ ├── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py │ ├── configuration_camembert.py │ ├── configuration_mmbt.py │ └── activations.py ├── templates └── adding_a_new_example_script │ └── README.md ├── .github ├── ISSUE_TEMPLATE │ ├── new-adapter-setup.md │ ├── feature-request.md │ └── bug-report.md └── workflows │ ├── adapter_docs_build.yml │ └── tests_torch.yml ├── deploy_multi_version_doc.sh ├── docker ├── transformers-pytorch-cpu │ └── Dockerfile ├── transformers-tensorflow-cpu │ └── Dockerfile ├── transformers-cpu │ └── Dockerfile ├── transformers-pytorch-gpu │ └── Dockerfile ├── transformers-tensorflow-gpu │ └── Dockerfile └── transformers-gpu │ └── Dockerfile ├── setup.cfg ├── .circleci └── deploy.sh └── Makefile /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/empty.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | -------------------------------------------------------------------------------- /examples/summarization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/summarization/t5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/translation/t5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/summarization/bart/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/summarization/bertabs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/examples.md: -------------------------------------------------------------------------------- 1 | ../../examples/README.md -------------------------------------------------------------------------------- /docs/source/notebooks.md: -------------------------------------------------------------------------------- 1 | ../../notebooks/README.md -------------------------------------------------------------------------------- /tests/fixtures/dummy-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_type": "roberta" 3 | } -------------------------------------------------------------------------------- /model_cards/bert-base-chinese-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: chinese 3 | --- 4 | -------------------------------------------------------------------------------- /model_cards/bert-large-cased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: apache-2.0 3 | --- 4 | -------------------------------------------------------------------------------- /examples/ner/.gitignore: -------------------------------------------------------------------------------- 1 | *.tmp 2 | cached_* 3 | *.txt 4 | preprocess.* 5 | *.ps1 6 | -------------------------------------------------------------------------------- /tests/fixtures/input.txt: -------------------------------------------------------------------------------- 1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer 2 | -------------------------------------------------------------------------------- /model_cards/bart-large-cnn/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | --- 5 | 6 | -------------------------------------------------------------------------------- /model_cards/bart-large-xsum/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | --- 5 | 6 | -------------------------------------------------------------------------------- /examples/summarization/bertabs/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | 3 | # For ROUGE 4 | nltk 5 | py-rouge 6 | -------------------------------------------------------------------------------- /model_cards/bert-base-german-dbmdz-cased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: german 3 | license: mit 4 | --- 5 | -------------------------------------------------------------------------------- /model_cards/bert-base-german-dbmdz-uncased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: german 3 | license: mit 4 | --- 5 | -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-10_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-10_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-10_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-12_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-12_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-12_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-2_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-2_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-2_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-2_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-4_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-4_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-4_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-4_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-6_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-6_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-6_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-6_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-8_H-128_A-2/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-8_H-256_A-4/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-8_H-512_A-8/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-8_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/facebook/bart-large-cnn/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | 5 | license: mit 6 | --- 7 | -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-10_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /model_cards/google/bert_uncased_L-12_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/.gitignore: -------------------------------------------------------------------------------- 1 | *.* 2 | cache* 3 | temp* 4 | !*.txt 5 | !*.tsv 6 | !*.json 7 | !.gitignore -------------------------------------------------------------------------------- /adapter_docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/adapter_docs/logo.png -------------------------------------------------------------------------------- /docs/source/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/favicon.ico -------------------------------------------------------------------------------- /adapter_docs/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/adapter_docs/favicon.png -------------------------------------------------------------------------------- /model_cards/bert-base-multilingual-cased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: multilingual 3 | 4 | license: apache-2.0 5 | --- 6 | -------------------------------------------------------------------------------- /model_cards/distilbert-base-multilingual-cased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: multilingual 3 | license: apache-2.0 4 | --- 5 | -------------------------------------------------------------------------------- /model_cards/bert-base-multilingual-uncased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: multilingual 3 | 4 | license: apache-2.0 5 | --- 6 | -------------------------------------------------------------------------------- /model_cards/t5-11b-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | - translation 5 | 6 | license: apache-2.0 7 | --- 8 | 9 | -------------------------------------------------------------------------------- /model_cards/t5-3b-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | - translation 5 | 6 | license: apache-2.0 7 | --- 8 | 9 | -------------------------------------------------------------------------------- /model_cards/t5-base-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | - translation 5 | 6 | license: apache-2.0 7 | --- 8 | 9 | -------------------------------------------------------------------------------- /model_cards/t5-large-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | - translation 5 | 6 | license: apache-2.0 7 | --- 8 | 9 | -------------------------------------------------------------------------------- /model_cards/t5-small-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - summarization 4 | - translation 5 | 6 | license: apache-2.0 7 | --- 8 | 9 | -------------------------------------------------------------------------------- /tests/fixtures/spiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/tests/fixtures/spiece.model -------------------------------------------------------------------------------- /adapter_docs/adapter_types.md: -------------------------------------------------------------------------------- 1 | # Adapter Types 2 | 3 | TODO write something about different adapter types and configurations. 4 | -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Thin.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Thin.otf -------------------------------------------------------------------------------- /tests/fixtures/test_sentencepiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/tests/fixtures/test_sentencepiece.model -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Light.ttf -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Medium.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Medium.otf -------------------------------------------------------------------------------- /docs/source/_static/css/Calibre-Regular.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Regular.otf -------------------------------------------------------------------------------- /docs/source/imgs/transformers_logo_name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/transformers_logo_name.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_linear_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_linear_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_constant_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_constant_schedule.png -------------------------------------------------------------------------------- /examples/distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | 3 | gitpython==3.0.2 4 | tensorboard>=1.14.0 5 | tensorboardX==1.8 6 | psutil==5.6.6 7 | scipy==1.3.1 8 | -------------------------------------------------------------------------------- /examples/text-generation/pplm/imgs/wooly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/examples/text-generation/pplm/imgs/wooly.png -------------------------------------------------------------------------------- /examples/text-generation/pplm/imgs/headfigure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/examples/text-generation/pplm/imgs/headfigure.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_hard_restarts_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png -------------------------------------------------------------------------------- /docs/source/imgs/warmup_cosine_warm_restarts_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png -------------------------------------------------------------------------------- /model_cards/severinsimmler/literary-german-bert/kfold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/model_cards/severinsimmler/literary-german-bert/kfold.png -------------------------------------------------------------------------------- /examples/movement-pruning/emmental/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer 3 | from .masked_nn import MaskedLinear 4 | -------------------------------------------------------------------------------- /model_cards/severinsimmler/literary-german-bert/prosa-jahre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png -------------------------------------------------------------------------------- /model_cards/deepset/sentence_bert/README.md: -------------------------------------------------------------------------------- 1 | This is an upload of the bert-base-nli-stsb-mean-tokens pretrained model from the Sentence Transformers Repo (https://github.com/UKPLab/sentence-transformers) 2 | -------------------------------------------------------------------------------- /adapter_docs/classes/adapter_modules.rst: -------------------------------------------------------------------------------- 1 | Adapter Modules 2 | =============== 3 | 4 | Classes implementing task and language adapters. 5 | 6 | .. automodule:: transformers.adapter_modeling 7 | :members: 8 | -------------------------------------------------------------------------------- /model_cards/djstrong/bg_cs_pl_ru_cased_L-12_H-768_A-12/README.md: -------------------------------------------------------------------------------- 1 | Slavic BERT from https://github.com/deepmipt/Slavic-BERT-NER http://files.deeppavlov.ai/deeppavlov_data/bg_cs_pl_ru_cased_L-12_H-768_A-12.tar.gz 2 | -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | scikit-learn 3 | seqeval 4 | psutil 5 | sacrebleu 6 | rouge-score 7 | tensorflow_datasets 8 | pytorch-lightning==0.7.3 # April 10, 2020 release 9 | matplotlib 10 | -------------------------------------------------------------------------------- /examples/movement-pruning/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.4.0 2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers 3 | knockknock>=0.1.8.1 4 | h5py>=2.10.0 5 | numpy>=1.18.2 6 | scipy>=1.4.1 7 | -------------------------------------------------------------------------------- /model_cards/gpt2-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: mit 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /adapter_docs/classes/adapter_config.rst: -------------------------------------------------------------------------------- 1 | Model Adapters Config 2 | ======================= 3 | 4 | This class manages the setup and configuration of adapter modules in a pre-trained model. 5 | 6 | .. autoclass:: transformers.ModelAdaptersConfig 7 | :members: 8 | -------------------------------------------------------------------------------- /adapter_docs/classes/bert_mixins.rst: -------------------------------------------------------------------------------- 1 | BERT Mixins 2 | ==================== 3 | 4 | These classes added to the BERT module classes add support for adapters to all BERT-based transformer models. 5 | 6 | .. automodule:: transformers.adapter_bert 7 | :members: 8 | -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilgpt2.json: -------------------------------------------------------------------------------- 1 | { 2 | "initializer_range": 0.02, 3 | "layer_norm_epsilon": 0.00001, 4 | "n_ctx": 1024, 5 | "n_embd": 768, 6 | "n_head": 12, 7 | "n_layer": 6, 8 | "n_positions": 1024, 9 | "vocab_size": 50257 10 | } -------------------------------------------------------------------------------- /model_cards/distilgpt2-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/roberta-base-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: mit 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=transformers 3 | omit = 4 | # skip convertion scripts from testing for now 5 | */convert_* 6 | */__main__.py 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | raise 11 | except 12 | register_parameter -------------------------------------------------------------------------------- /adapter_docs/classes/adapter_utils.rst: -------------------------------------------------------------------------------- 1 | Adapter Utilities 2 | ==================== 3 | 4 | A collection of utility methods mainly related to searching and loading adapter modules from 5 | Adapter-Hub. 6 | 7 | .. automodule:: transformers.adapter_utils 8 | :members: 9 | -------------------------------------------------------------------------------- /model_cards/xlm-roberta-base-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: mit 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/albert-base-v1-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/albert-xxlarge-v2-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /model_cards/bert-base-cased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/bert-base-uncased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/distilroberta-base-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/xlm-mlm-en-2048-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: cc-by-nc-4.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/distilbert-base-uncased-README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - exbert 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /model_cards/binwang/xlnet-base-cased/README.md: -------------------------------------------------------------------------------- 1 | This model is pre-trained **XLNET** with 12 layers. 2 | 3 | It comes with paper: SBERT-WK: A Sentence Embedding Method By Dissecting BERT-based Word Models 4 | 5 | Project Page: [SBERT-WK](https://github.com/BinWang28/SBERT-WK-Sentence-Embedding) 6 | -------------------------------------------------------------------------------- /examples/contrib/README.md: -------------------------------------------------------------------------------- 1 | # Community contributed examples 2 | 3 | This folder contains examples which are not actively maintained (mostly contributed by the community). 4 | 5 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working. 6 | -------------------------------------------------------------------------------- /docs/source/_static/css/code-snippets.css: -------------------------------------------------------------------------------- 1 | 2 | .highlight .c1, .highlight .sd{ 3 | color: #999 4 | } 5 | 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc { 7 | color: #FB8D68; 8 | } 9 | 10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow { 11 | color: #6670FF; 12 | } -------------------------------------------------------------------------------- /src/transformers/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | from .glue import GlueDataset, GlueDataTrainingArguments 6 | from .language_modeling import LineByLineTextDataset, TextDataset 7 | -------------------------------------------------------------------------------- /model_cards/daigo/bert-base-japanese-sentiment/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - japanese 4 | --- 5 | 6 | binary classification 7 | 8 | # Usage 9 | ``` 10 | print(pipeline("sentiment-analysis",model="daigo/bert-base-japanese-sentiment",tokenizer="daigo/bert-base-japanese-sentiment")("私は幸福である。")) 11 | 12 | [{'label': 'ポジティブ', 'score': 0.98430425}] 13 | ``` 14 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/GermEval/labels.txt: -------------------------------------------------------------------------------- 1 | B-LOC 2 | B-LOCderiv 3 | B-LOCpart 4 | B-ORG 5 | B-ORGderiv 6 | B-ORGpart 7 | B-OTH 8 | B-OTHderiv 9 | B-OTHpart 10 | B-PER 11 | B-PERderiv 12 | B-PERpart 13 | I-LOC 14 | I-LOCderiv 15 | I-LOCpart 16 | I-ORG 17 | I-ORGderiv 18 | I-ORGpart 19 | I-OTH 20 | I-OTHderiv 21 | I-OTHpart 22 | I-PER 23 | I-PERderiv 24 | I-PERpart 25 | O 26 | -------------------------------------------------------------------------------- /examples/movement-pruning/emmental/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .configuration_bert_masked import MaskedBertConfig 3 | from .modeling_bert_masked import ( 4 | MaskedBertForMultipleChoice, 5 | MaskedBertForQuestionAnswering, 6 | MaskedBertForSequenceClassification, 7 | MaskedBertForTokenClassification, 8 | MaskedBertModel, 9 | ) 10 | from .modules import * 11 | -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilbert-base-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 28996 14 | } 15 | -------------------------------------------------------------------------------- /model_cards/lvwerra/gpt2-medium-taboo/README.md: -------------------------------------------------------------------------------- 1 | # GPT-2 (medium) Taboo 2 | 3 | ## What is it? 4 | A fine-tuned GPT-2 version for Taboo cards generation. 5 | 6 | ## Training setting 7 | 8 | The model was trained on ~900 Taboo cards in the following format for 100 epochs: 9 | ``` 10 | Describe the word Glitch without using the words Problem, Unexpected, Technology, Minor, Outage. 11 | ```` 12 | 13 | -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilbert-base-uncased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 30522 14 | } 15 | -------------------------------------------------------------------------------- /src/transformers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from argparse import ArgumentParser 3 | 4 | 5 | class BaseTransformersCLICommand(ABC): 6 | @staticmethod 7 | @abstractmethod 8 | def register_subcommand(parser: ArgumentParser): 9 | raise NotImplementedError() 10 | 11 | @abstractmethod 12 | def run(self): 13 | raise NotImplementedError() 14 | -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilbert-base-multilingual-cased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 119547 14 | } 15 | -------------------------------------------------------------------------------- /model_cards/lysandre/arxiv/README.md: -------------------------------------------------------------------------------- 1 | # ArXiv GPT-2 checkpoint 2 | 3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` finetuned to ArXiv paper on physics fields. 4 | 5 | ## Training data 6 | 7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 130MB of text, mostly from quantum physics (quant-ph) and other physics sub-fields. 8 | -------------------------------------------------------------------------------- /src/transformers/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | from ..file_utils import is_torch_available 6 | 7 | 8 | if is_torch_available(): 9 | from .benchmark_args import PyTorchBenchmarkArguments 10 | from .benchmark import PyTorchBenchmark 11 | -------------------------------------------------------------------------------- /model_cards/lysandre/arxiv-nlp/README.md: -------------------------------------------------------------------------------- 1 | # ArXiv-NLP GPT-2 checkpoint 2 | 3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` fine-tuned to ArXiv paper on the computational linguistics field. 4 | 5 | ## Training data 6 | 7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 80MB of text from the computational linguistics (cs.CL) field. -------------------------------------------------------------------------------- /examples/distillation/training_configs/distilroberta-base.json: -------------------------------------------------------------------------------- 1 | { 2 | "vocab_size": 50265, 3 | "hidden_size": 768, 4 | "num_hidden_layers": 6, 5 | "num_attention_heads": 12, 6 | "intermediate_size": 3072, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "attention_probs_dropout_prob": 0.1, 10 | "max_position_embeddings": 514, 11 | "type_vocab_size": 1, 12 | "initializer_range": 0.02, 13 | "layer_norm_eps": 0.00001 14 | } -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-arabic/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **Arabic language**. The mono in the name refers to the monolingual setting, where the model is trained using only Arabic language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.8674776 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | -------------------------------------------------------------------------------- /model_cards/Hate-speech-CNERG/dehatebert-mono-english/README.md: -------------------------------------------------------------------------------- 1 | This model is used detecting **hatespeech** in **English language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model. 2 | The model is trained with different learning rates and the best validation score achieved is 0.7069374 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT) 3 | -------------------------------------------------------------------------------- /docs/source/main_classes/configuration.rst: -------------------------------------------------------------------------------- 1 | Configuration 2 | ---------------------------------------------------- 3 | 4 | The base class ``PretrainedConfig`` implements the common methods for loading/saving a configuration either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository). 5 | 6 | ``PretrainedConfig`` 7 | ~~~~~~~~~~~~~~~~~~~~~ 8 | 9 | .. autoclass:: transformers.PretrainedConfig 10 | :members: 11 | -------------------------------------------------------------------------------- /model_cards/jannesg/bertsson/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: swedish 3 | --- 4 | 5 | # BERTSSON Models 6 | 7 | The models are trained on: 8 | - Government Text 9 | - Swedish Literature 10 | - Swedish News 11 | 12 | Corpus size: Roughly 6B tokens. 13 | 14 | The following models are currently available: 15 | 16 | - **bertsson** - A BERT base model trained with the same hyperparameters as first published by Google. 17 | 18 | All models are cased and trained with whole word masking. 19 | 20 | Stay tuned for evaluations. 21 | -------------------------------------------------------------------------------- /templates/adding_a_new_example_script/README.md: -------------------------------------------------------------------------------- 1 | # How to add a new example script in 🤗Transformers 2 | 3 | This folder provide a template for adding a new example script implementing a training or inference task with the models in the 🤗Transformers library. 4 | 5 | Currently only examples for PyTorch are provided which are adaptations of the library's SQuAD examples which implement single-GPU and distributed training with gradient accumulation and mixed-precision (using NVIDIA's apex library) to cover a reasonable range of use cases. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-adapter-setup.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F31F New adapter setup" 3 | about: Submit a proposal/request to implement a new adapter setup or to add a new model 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🌟 New adapter setup 11 | 12 | ## Model description 13 | 14 | 15 | 16 | ## Open source status 17 | 18 | * [ ] the model implementation is available: (give details) 19 | * [ ] the model weights are available: (give details) 20 | * [ ] who are the authors: (mention them, if possible by @gh-username) 21 | -------------------------------------------------------------------------------- /adapter_docs/classes/model_mixins.rst: -------------------------------------------------------------------------------- 1 | Model Mixins 2 | ======================= 3 | 4 | These classes provide the basis of adapter module integration into model classes such as adapter saving and loading. 5 | Depending on the model, one of these mixins should be implemented by every adapter-supporting model class. 6 | 7 | ModelAdaptersMixin 8 | ------------------ 9 | 10 | .. autoclass:: transformers.ModelAdaptersMixin 11 | :members: 12 | 13 | ModelWithHeadsAdaptersMixin 14 | --------------------------- 15 | 16 | .. autoclass:: transformers.ModelWithHeadsAdaptersMixin 17 | :members: 18 | -------------------------------------------------------------------------------- /examples/summarization/bart/run_train.sh: -------------------------------------------------------------------------------- 1 | export OUTPUT_DIR_NAME=bart_sum 2 | export CURRENT_DIR=${PWD} 3 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 4 | 5 | # Make output directory if it doesn't exist 6 | mkdir -p $OUTPUT_DIR 7 | 8 | # Add parent directory to python path to access lightning_base.py 9 | export PYTHONPATH="../../":"${PYTHONPATH}" 10 | 11 | python finetune.py \ 12 | --data_dir=./cnn-dailymail/cnn_dm \ 13 | --model_name_or_path=bart-large \ 14 | --learning_rate=3e-5 \ 15 | --train_batch_size=4 \ 16 | --eval_batch_size=4 \ 17 | --output_dir=$OUTPUT_DIR \ 18 | --do_train $@ 19 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/rubert-base-cased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - russian 4 | --- 5 | 6 | # rubert-base-cased 7 | 8 | RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on the Russian part of Wikipedia and news data. We used this training data to build a vocabulary of Russian subtokens and took a multilingual version of BERT‑base as an initialization for RuBERT\[1\]. 9 | 10 | 11 | \[1\]: Kuratov, Y., Arkhipov, M. \(2019\). Adaptation of Deep Bidirectional Multilingual Transformers for Russian Language. arXiv preprint [arXiv:1905.07213](https://arxiv.org/abs/1905.07213). 12 | -------------------------------------------------------------------------------- /tests/fixtures/hub-index.sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "t": { 3 | "s": { 4 | "default": "path/to/default", 5 | "bb1c8efb82510bed": { 6 | "default": "path/to/pfeiffer/default", 7 | "versions": { 8 | "example-org": "path/to/pfeiffer/example-org", 9 | "ukp": "path/to/pfeiffer/ukp" 10 | } 11 | }, 12 | "b1017368d7a97b11": { 13 | "versions": { 14 | "example-org": "path/to/houlsby/example-org" 15 | } 16 | } 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /src/transformers/data/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels 6 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features 7 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor 8 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels 9 | -------------------------------------------------------------------------------- /examples/text-generation/README.md: -------------------------------------------------------------------------------- 1 | ## Language generation 2 | 3 | Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/text-generation/run_generation.py). 4 | 5 | Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL, XLNet, CTRL. 6 | A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you 7 | can try out the different models available in the library. 8 | 9 | Example usage: 10 | 11 | ```bash 12 | python run_generation.py \ 13 | --model_type=gpt2 \ 14 | --model_name_or_path=gpt2 15 | ``` 16 | -------------------------------------------------------------------------------- /src/transformers/trainer_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, NamedTuple, Optional 2 | 3 | import numpy as np 4 | 5 | 6 | class EvalPrediction(NamedTuple): 7 | """ 8 | Evaluation output (always contains labels), to be used 9 | to compute metrics. 10 | """ 11 | 12 | predictions: np.ndarray 13 | label_ids: np.ndarray 14 | 15 | 16 | class PredictionOutput(NamedTuple): 17 | predictions: np.ndarray 18 | label_ids: Optional[np.ndarray] 19 | metrics: Optional[Dict[str, float]] 20 | 21 | 22 | class TrainOutput(NamedTuple): 23 | global_step: int 24 | training_loss: float 25 | 26 | 27 | PREFIX_CHECKPOINT_DIR = "checkpoint" 28 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /deploy_multi_version_doc.sh: -------------------------------------------------------------------------------- 1 | cd docs 2 | 3 | function deploy_doc(){ 4 | echo "Creating doc at commit $1 and pushing to folder $2" 5 | git checkout $1 6 | if [ ! -z "$2" ] 7 | then 8 | echo "Pushing version" $2 9 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2 10 | else 11 | echo "Pushing master" 12 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir 13 | fi 14 | } 15 | 16 | deploy_doc "master" 17 | deploy_doc "b33a385" v1.0.0 18 | deploy_doc "fe02e45" v1.1.0 19 | deploy_doc "89fd345" v1.2.0 20 | deploy_doc "fc9faa8" v2.0.0 21 | deploy_doc "3ddce1d" v2.1.1 22 | deploy_doc "f2f3294" v2.2.0 23 | deploy_doc "d0f8b9a" v2.3.0 24 | -------------------------------------------------------------------------------- /model_cards/lvwerra/bert-imdb/README.md: -------------------------------------------------------------------------------- 1 | # BERT-IMDB 2 | 3 | ## What is it? 4 | BERT (`bert-large-cased`) trained for sentiment classification on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). 5 | 6 | ## Training setting 7 | 8 | The model was trained on 80% of the IMDB dataset for sentiment classification for three epochs with a learning rate of `1e-5` with the `simpletransformers` library. The library uses a learning rate schedule. 9 | 10 | ## Result 11 | The model achieved 90% classification accuracy on the validation set. 12 | 13 | ## Reference 14 | The full experiment is available in the [tlr repo](https://lvwerra.github.io/trl/03-bert-imdb-training/). 15 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/bert-base-bg-cs-pl-ru-cased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - bulgarian 4 | - czech 5 | - polish 6 | - russian 7 | --- 8 | 9 | # bert-base-bg-cs-pl-ru-cased 10 | 11 | SlavicBERT\[1\] \(Slavic \(bg, cs, pl, ru\), cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on Russian News and four Wikipedias: Bulgarian, Czech, Polish, and Russian. Subtoken vocabulary was built using this data. Multilingual BERT was used as an initialization for SlavicBERT. 12 | 13 | 14 | \[1\]: Arkhipov M., Trofimova M., Kuratov Y., Sorokin A. \(2019\). [Tuning Multilingual Transformers for Language-Specific Named Entity Recognition](https://www.aclweb.org/anthology/W19-3712/). ACL anthology W19-3712. 15 | -------------------------------------------------------------------------------- /adapter_docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* The search field on top of the toc tree */ 2 | /* Mobile header */ 3 | .wy-side-nav-search, .wy-nav-top { 4 | background: #39B3C6; 5 | } 6 | /* toc tree text */ 7 | .wy-menu-vertical header, 8 | .wy-menu-vertical p.caption { 9 | color: #39B3C6 10 | } 11 | /* toc tree activated link */ 12 | .wy-menu-vertical a:active { 13 | background-color:#39B3C6; 14 | } 15 | /* Links */ 16 | a { 17 | color: #39B3C6 18 | } 19 | /* Source spans */ 20 | .rst-content .viewcode-link, .rst-content .viewcode-back{ 21 | color: #39B3C6; 22 | } 23 | /* The literal code blocks */ 24 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal { 25 | color: #39B3C6; 26 | } 27 | -------------------------------------------------------------------------------- /adapter_docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docker/transformers-pytorch-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | torch 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-tensorflow-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | tensorflow-cpu 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow-cpu \ 19 | torch 20 | 21 | WORKDIR /workspace 22 | COPY . transformers/ 23 | RUN cd transformers/ && \ 24 | python3 -m pip install --no-cache-dir . 25 | 26 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /docker/transformers-pytorch-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | torch 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /adapter_docs/README.md: -------------------------------------------------------------------------------- 1 | # The adapter-transformers documentation 2 | 3 | This is the documentation of the adapter-related parts of the transformers library and the Adapter-Hub. Huggingface's documentation of the base library is located in the `/docs` folder. 4 | 5 | ## Installing & Building 6 | 7 | Building the documentation requires some additional packages installed. You can install them by running the following command in the root folder: 8 | 9 | ```bash 10 | pip install -e ".[docs]" 11 | ``` 12 | 13 | Cleaning and regenerating the documentation files can be done using `sphinx` by running the following command in the `/adapter_docs` folder: 14 | 15 | ```bash 16 | make clean && make html 17 | ``` 18 | 19 | The build output will be located in `/adapter_docs/_build/html`. 20 | -------------------------------------------------------------------------------- /docker/transformers-tensorflow-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | mkl \ 18 | tensorflow 19 | 20 | WORKDIR /workspace 21 | COPY . transformers/ 22 | RUN cd transformers/ && \ 23 | python3 -m pip install --no-cache-dir . 24 | 25 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /examples/text-generation/pplm/pplm_classification_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class ClassificationHead(torch.nn.Module): 5 | """Classification Head for transformer encoders""" 6 | 7 | def __init__(self, class_size, embed_size): 8 | super().__init__() 9 | self.class_size = class_size 10 | self.embed_size = embed_size 11 | # self.mlp1 = torch.nn.Linear(embed_size, embed_size) 12 | # self.mlp2 = (torch.nn.Linear(embed_size, class_size)) 13 | self.mlp = torch.nn.Linear(embed_size, class_size) 14 | 15 | def forward(self, hidden_state): 16 | # hidden_state = F.relu(self.mlp1(hidden_state)) 17 | # hidden_state = self.mlp2(hidden_state) 18 | logits = self.mlp(hidden_state) 19 | return logits 20 | -------------------------------------------------------------------------------- /examples/contrib/mm-imdb/README.md: -------------------------------------------------------------------------------- 1 | ## MM-IMDb 2 | 3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/master/examples/contrib/mm-imdb/run_mmimdb.py). 4 | 5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata. 6 | 7 | ### Training on MM-IMDb 8 | 9 | ``` 10 | python run_mmimdb.py \ 11 | --data_dir /path/to/mmimdb/dataset/ \ 12 | --model_type bert \ 13 | --model_name_or_path bert-base-uncased \ 14 | --output_dir /path/to/save/dir/ \ 15 | --do_train \ 16 | --do_eval \ 17 | --max_seq_len 512 \ 18 | --gradient_accumulation_steps 20 \ 19 | --num_image_embeds 3 \ 20 | --num_train_epochs 100 \ 21 | --patience 5 22 | ``` 23 | 24 | -------------------------------------------------------------------------------- /docker/transformers-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="transformers" 4 | 5 | RUN apt update && \ 6 | apt install -y bash \ 7 | build-essential \ 8 | git \ 9 | curl \ 10 | ca-certificates \ 11 | python3 \ 12 | python3-pip && \ 13 | rm -rf /var/lib/apt/lists 14 | 15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 16 | python3 -m pip install --no-cache-dir \ 17 | jupyter \ 18 | tensorflow \ 19 | torch 20 | 21 | WORKDIR /workspace 22 | COPY . transformers/ 23 | RUN cd transformers/ && \ 24 | python3 -m pip install --no-cache-dir . 25 | 26 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | ensure_newline_before_comments = True 3 | force_grid_wrap = 0 4 | include_trailing_comma = True 5 | known_first_party = transformers 6 | known_third_party = 7 | absl 8 | fairseq 9 | fastprogress 10 | git 11 | h5py 12 | MeCab 13 | nltk 14 | numpy 15 | packaging 16 | PIL 17 | psutil 18 | pytorch_lightning 19 | rouge_score 20 | sacrebleu 21 | seqeval 22 | sklearn 23 | tensorboardX 24 | tensorflow 25 | tensorflow_datasets 26 | timeout_decorator 27 | torch 28 | torchtext 29 | torchvision 30 | torch_xla 31 | tqdm 32 | 33 | line_length = 119 34 | lines_after_imports = 2 35 | multi_line_output = 3 36 | use_parentheses = True 37 | 38 | [flake8] 39 | ignore = E203, E501, E741, W503 40 | max-line-length = 119 41 | -------------------------------------------------------------------------------- /model_cards/julien-c/bert-xsmall-dummy/README.md: -------------------------------------------------------------------------------- 1 | ## How to build a dummy model 2 | 3 | 4 | ```python 5 | from transformers.configuration_bert import BertConfig 6 | from transformers.modeling_bert import BertForMaskedLM 7 | from transformers.modeling_tf_bert import TFBertForMaskedLM 8 | from transformers.tokenization_bert import BertTokenizer 9 | 10 | 11 | SMALL_MODEL_IDENTIFIER = "julien-c/bert-xsmall-dummy" 12 | DIRNAME = "./bert-xsmall-dummy" 13 | 14 | config = BertConfig(10, 20, 1, 1, 40) 15 | 16 | model = BertForMaskedLM(config) 17 | model.save_pretrained(DIRNAME) 18 | 19 | tf_model = TFBertForMaskedLM.from_pretrained(DIRNAME, from_pt=True) 20 | tf_model.save_pretrained(DIRNAME) 21 | 22 | # Slightly different for tokenizer. 23 | # tokenizer = BertTokenizer.from_pretrained(DIRNAME) 24 | # tokenizer.save_pretrained() 25 | ``` 26 | -------------------------------------------------------------------------------- /model_cards/spentaur/yelp/README.md: -------------------------------------------------------------------------------- 1 | # DistilBERT Yelp Review Sentiment 2 | This model is used for sentiment analysis on english yelp reviews. 3 | It is a DistilBERT model trained on 1 million reviews from the yelp open dataset. 4 | It is a regression model, with outputs in the range of ~-2 to ~2. With -2 being 1 star and 2 being 5 stars. 5 | It was trained using the [ktrain](https://github.com/amaiya/ktrain) because of it's ease of use. 6 | 7 | Example use: 8 | 9 | ``` 10 | tokenizer = AutoTokenizer.from_pretrained( 11 | 'distilbert-base-uncased', use_fast=True) 12 | model = TFAutoModelForSequenceClassification.from_pretrained( 13 | "spentaur/yelp") 14 | 15 | review = "This place is great!" 16 | input_ids = tokenizer.encode(review, return_tensors='tf') 17 | pred = model(input_ids)[0][0][0].numpy() 18 | # pred should === 1.9562385 19 | ``` 20 | -------------------------------------------------------------------------------- /.circleci/deploy.sh: -------------------------------------------------------------------------------- 1 | cd docs 2 | 3 | function deploy_doc(){ 4 | echo "Creating doc at commit $1 and pushing to folder $2" 5 | git checkout $1 6 | if [ ! -z "$2" ] 7 | then 8 | if [ -d "$dir/$2" ]; then 9 | echo "Directory" $2 "already exists" 10 | else 11 | echo "Pushing version" $2 12 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2 13 | fi 14 | else 15 | echo "Pushing master" 16 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir 17 | fi 18 | } 19 | 20 | deploy_doc "master" 21 | deploy_doc "b33a385" v1.0.0 22 | deploy_doc "fe02e45" v1.1.0 23 | deploy_doc "89fd345" v1.2.0 24 | deploy_doc "fc9faa8" v2.0.0 25 | deploy_doc "3ddce1d" v2.1.1 26 | deploy_doc "3616209" v2.2.0 27 | deploy_doc "d0f8b9a" v2.3.0 28 | deploy_doc "6664ea9" v2.4.0 29 | deploy_doc "fb560dc" v2.5.0 30 | -------------------------------------------------------------------------------- /adapter_docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | Our *adapter-transformers* package is a drop-in replacement for Huggingface's *transformers* library. As the original package, it is tested on Python 3.6+ and PyTorch 1.1.0+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first. 4 | 5 | ## Using pip (from GitHub) 6 | 7 | The simplest way of installation is by using pip to install the package from our GitHub repository: 8 | 9 | ``` 10 | pip install git+https://github.com/adapter-hub/adapter-transformers.git 11 | ``` 12 | 13 | ## From repository 14 | 15 | Alternatively, you can clone the repository first and install the package from source. 16 | This allows you to run the included example scripts: 17 | 18 | ``` 19 | git clone https://github.com/adapter-hub/adapter-transformers.git 20 | cd adapter-transformers 21 | pip install . 22 | ``` 23 | -------------------------------------------------------------------------------- /src/transformers/data/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this 3 | # module, but to preserve other warnings. So, don't check this module at all. 4 | 5 | from .metrics import is_sklearn_available 6 | from .processors import ( 7 | DataProcessor, 8 | InputExample, 9 | InputFeatures, 10 | SingleSentenceClassificationProcessor, 11 | SquadExample, 12 | SquadFeatures, 13 | SquadV1Processor, 14 | SquadV2Processor, 15 | glue_convert_examples_to_features, 16 | glue_output_modes, 17 | glue_processors, 18 | glue_tasks_num_labels, 19 | squad_convert_examples_to_features, 20 | xnli_output_modes, 21 | xnli_processors, 22 | xnli_tasks_num_labels, 23 | ) 24 | 25 | 26 | if is_sklearn_available(): 27 | from .metrics import glue_compute_metrics, xnli_compute_metrics 28 | -------------------------------------------------------------------------------- /model_cards/lvwerra/gpt2-imdb/README.md: -------------------------------------------------------------------------------- 1 | # GPT2-IMDB 2 | 3 | ## What is it? 4 | A GPT2 (`gpt2`) language model fine-tuned on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). 5 | 6 | ## Training setting 7 | 8 | The GPT2 language model was fine-tuned for 1 epoch on the IMDB dataset. All comments were joined into a single text file separated by the EOS token: 9 | 10 | ``` 11 | import pandas as pd 12 | df = pd.read_csv("imdb-dataset.csv") 13 | imdb_str = " <|endoftext|> ".join(df['review'].tolist()) 14 | 15 | with open ('imdb.txt', 'w') as f: 16 | f.write(imdb_str) 17 | ``` 18 | 19 | To train the model the `run_language_modeling.py` script in the `transformer` library was used: 20 | 21 | ``` 22 | python run_language_modeling.py 23 | --train_data_file imdb.txt 24 | --output_dir gpt2-imdb 25 | --model_type gpt2 26 | --model_name_or_path gpt2 27 | ``` 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature request" 3 | about: Submit a proposal/request for a new adapter-transformers feature 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🚀 Feature request 11 | 12 | 14 | 15 | ## Motivation 16 | 17 | 20 | 21 | ## Your contribution 22 | 23 | 26 | -------------------------------------------------------------------------------- /adapter_docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /model_cards/allenai/longformer-base-4096-extra.pos.embd.only/README.md: -------------------------------------------------------------------------------- 1 | 2 | # longformer-base-4096-extra.pos.embd.only 3 | 4 | This model is similar to `longformer-base-4096` but it was pretrained to preserve RoBERTa weights by freezing all RoBERTa weights and only train the additional position embeddings. 5 | 6 | 7 | ### Citing 8 | 9 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150). 10 | ``` 11 | @article{Beltagy2020Longformer, 12 | title={Longformer: The Long-Document Transformer}, 13 | author={Iz Beltagy and Matthew E. Peters and Arman Cohan}, 14 | journal={arXiv:2004.05150}, 15 | year={2020}, 16 | } 17 | ``` 18 | 19 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org). 20 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering. 21 | -------------------------------------------------------------------------------- /docs/source/main_classes/model.rst: -------------------------------------------------------------------------------- 1 | Models 2 | ---------------------------------------------------- 3 | 4 | The base class ``PreTrainedModel`` implements the common methods for loading/saving a model either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository). 5 | 6 | ``PreTrainedModel`` also implements a few methods which are common among all the models to: 7 | 8 | - resize the input token embeddings when new tokens are added to the vocabulary 9 | - prune the attention heads of the model. 10 | 11 | ``PreTrainedModel`` 12 | ~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.PreTrainedModel 15 | :members: 16 | 17 | ``Helper Functions`` 18 | ~~~~~~~~~~~~~~~~~~~~~ 19 | 20 | .. autofunction:: transformers.apply_chunking_to_forward 21 | 22 | 23 | ``TFPreTrainedModel`` 24 | ~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. autoclass:: transformers.TFPreTrainedModel 27 | :members: 28 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/rubert-base-cased-conversational/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - russian 4 | --- 5 | 6 | # rubert-base-cased-conversational 7 | 8 | Conversational RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on OpenSubtitles\[1\], [Dirty](https://d3.ru/), [Pikabu](https://pikabu.ru/), and a Social Media segment of Taiga corpus\[2\]. We assembled a new vocabulary for Conversational RuBERT model on this data and initialized the model with [RuBERT](../rubert-base-cased). 9 | 10 | 11 | \[1\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\) 12 | 13 | \[2\]: Shavrina T., Shapovalova O. \(2017\) TO THE METHODOLOGY OF CORPUS CONSTRUCTION FOR MACHINE LEARNING: «TAIGA» SYNTAX TREE CORPUS AND PARSER. in proc. of “CORPORA2017”, international conference , Saint-Petersbourg, 2017. 14 | -------------------------------------------------------------------------------- /tests/test_adapter_saving.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from transformers import ADAPTER_CONFIG_MAP, AdapterType, BertModel, RobertaModel, XLMRobertaModel 4 | 5 | from .utils import require_torch 6 | 7 | 8 | @require_torch 9 | class AdapterModelTest(unittest.TestCase): 10 | model_classes = [BertModel, RobertaModel, XLMRobertaModel] 11 | 12 | def test_model_config_serialization(self): 13 | """PretrainedConfigurations should not raise an Exception when serializing the config dict 14 | 15 | See, e.g., PretrainedConfig.to_json_string() 16 | """ 17 | for model_class in self.model_classes: 18 | for k, v in ADAPTER_CONFIG_MAP.items(): 19 | model_config = model_class.config_class 20 | model = model_class(model_config()) 21 | model.add_adapter("test", adapter_type=AdapterType.text_task, config=v) 22 | # should not raise an exception 23 | model.config.to_json_string() 24 | -------------------------------------------------------------------------------- /.github/workflows/adapter_docs_build.yml: -------------------------------------------------------------------------------- 1 | name: Build Adapter Docs 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | paths: [ 'adapter_docs/**' ] 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | with: 14 | submodules: recursive 15 | - uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.6 18 | - name: Install 19 | run: | 20 | pip install .[tf,torch,docs] 21 | - name: Build 22 | run: | 23 | cd adapter_docs && make html && cd .. 24 | - name: Deploy 25 | uses: peaceiris/actions-gh-pages@v3 26 | with: 27 | github_token: ${{ secrets.GITHUB_TOKEN }} 28 | user_name: "Adapter-Hub-Bert" 29 | user_email: "---" 30 | publish_dir: ./adapter_docs/_build/html 31 | publish_branch: gh-pages 32 | force_orphan: true 33 | cname: docs.adapterhub.ml 34 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: quality style test test-examples 2 | 3 | # Check that source code meets quality standards 4 | 5 | quality: 6 | black --check --line-length 119 --target-version py35 examples templates tests src utils 7 | isort --check-only --recursive examples templates tests src utils 8 | flake8 examples templates tests src utils 9 | 10 | # Format source code automatically 11 | 12 | style: 13 | black --line-length 119 --target-version py35 examples templates tests src utils 14 | isort --recursive examples templates tests src utils 15 | 16 | # Run tests for the library 17 | 18 | test: 19 | python -m pytest -n auto --dist=loadfile -s -v ./tests/ 20 | 21 | test-reduced: 22 | python -m pytest -n auto --dist=loadfile -s -v\ 23 | --ignore-glob='tests/test_tokenization*'\ 24 | --ignore-glob='tests/test_pipelines*'\ 25 | --ignore-glob='tests/test_hf*'\ 26 | --ignore-glob='tests/test_doc*'\ 27 | ./tests/ 28 | 29 | # Run tests for examples 30 | 31 | test-examples: 32 | python -m pytest -n auto --dist=loadfile -s -v ./examples/ 33 | -------------------------------------------------------------------------------- /model_cards/codegram/calbert-base-uncased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: catalan 3 | --- 4 | 5 | # CALBERT: a Catalan Language Model 6 | 7 | ## Introduction 8 | 9 | CALBERT is an open-source language model for Catalan based on the ALBERT architecture. 10 | 11 | It is now available on Hugging Face in its `base-uncased` version, and was pretrained on the [OSCAR dataset](https://traces1.inria.fr/oscar/). 12 | 13 | For further information or requests, please go to the [GitHub repository](https://github.com/codegram/calbert) 14 | 15 | ## Pre-trained models 16 | 17 | | Model | Arch. | Training data | 18 | |-------------------------------------|------------------|-----------------------------------| 19 | | `codegram` / `calbert-base-uncased` | Base (uncased) | OSCAR (4.3 GB of text) | 20 | 21 | 22 | ## Authors 23 | 24 | CALBERT was trained and evaluated by [Txus Bach](https://twitter.com/txustice), as part of [Codegram](https://www.codegram.com)'s applied research. 25 | 26 | -------------------------------------------------------------------------------- /examples/summarization/bart/run_train_tiny.sh: -------------------------------------------------------------------------------- 1 | # Script for verifying that run_bart_sum can be invoked from its directory 2 | 3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test) 4 | wget https://s3.amazonaws.com/datasets.huggingface.co/summarization/cnn_tiny.tgz 5 | tar -xzvf cnn_tiny.tgz 6 | rm cnn_tiny.tgz 7 | 8 | export OUTPUT_DIR_NAME=bart_utest_output 9 | export CURRENT_DIR=${PWD} 10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 11 | 12 | # Make output directory if it doesn't exist 13 | mkdir -p $OUTPUT_DIR 14 | 15 | # Add parent directory to python path to access lightning_base.py and utils.py 16 | export PYTHONPATH="../../":"${PYTHONPATH}" 17 | python finetune.py \ 18 | --data_dir=cnn_tiny/ \ 19 | --model_type=bart \ 20 | --model_name_or_path=sshleifer/bart-tiny-random \ 21 | --learning_rate=3e-5 \ 22 | --train_batch_size=2 \ 23 | --eval_batch_size=2 \ 24 | --output_dir=$OUTPUT_DIR \ 25 | --num_train_epochs=1 \ 26 | --n_gpu=0 \ 27 | --do_train $@ 28 | 29 | rm -rf cnn_tiny 30 | rm -rf $OUTPUT_DIR 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/rubert-base-cased-sentence/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - russian 4 | --- 5 | 6 | # rubert-base-cased-sentence 7 | 8 | Sentence RuBERT \(Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters\) is a representation‑based sentence encoder for Russian. It is initialized with RuBERT and fine‑tuned on SNLI\[1\] google-translated to russian and on russian part of XNLI dev set\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\]. 9 | 10 | 11 | \[1\]: S. R. Bowman, G. Angeli, C. Potts, and C. D. Manning. \(2015\) A large annotated corpus for learning natural language inference. arXiv preprint [arXiv:1508.05326](https://arxiv.org/abs/1508.05326) 12 | 13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053) 14 | 15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084) 16 | -------------------------------------------------------------------------------- /tests/test_activations.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from transformers import is_torch_available 4 | 5 | from .utils import require_torch 6 | 7 | 8 | if is_torch_available(): 9 | from transformers.activations import _gelu_python, get_activation, gelu_new 10 | import torch 11 | 12 | 13 | @require_torch 14 | class TestActivations(unittest.TestCase): 15 | def test_gelu_versions(self): 16 | x = torch.Tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100]) 17 | torch_builtin = get_activation("gelu") 18 | self.assertTrue(torch.eq(_gelu_python(x), torch_builtin(x)).all().item()) 19 | self.assertFalse(torch.eq(_gelu_python(x), gelu_new(x)).all().item()) 20 | 21 | def test_get_activation(self): 22 | get_activation("swish") 23 | get_activation("relu") 24 | get_activation("tanh") 25 | get_activation("gelu_new") 26 | get_activation("gelu_fast") 27 | with self.assertRaises(KeyError): 28 | get_activation("bogus") 29 | with self.assertRaises(KeyError): 30 | get_activation(None) 31 | -------------------------------------------------------------------------------- /src/transformers/configuration_marian.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The OPUS-NMT Team, Marian team, and The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ Marian model configuration """ 16 | 17 | from .configuration_bart import BartConfig 18 | 19 | 20 | PRETRAINED_CONFIG_ARCHIVE_MAP = { 21 | "Helsinki-NLP/opus-mt-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json", 22 | } 23 | 24 | 25 | class MarianConfig(BartConfig): 26 | model_type = "marian" 27 | -------------------------------------------------------------------------------- /model_cards/clue/xlnet_chinese_large/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: chinese 3 | --- 4 | 5 | ## xlnet_chinese_large 6 | 7 | ### Overview 8 | 9 | **Language model:** xlnet-large 10 | **Model size:** 1.3G 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | ``` 22 | import torch 23 | from transformers import XLNetTokenizer,XLNetModel 24 | tokenizer = XLNetTokenizer.from_pretrained("clue/xlnet_chinese_large") 25 | xlnet = XLNetModel.from_pretrained("clue/xlnet_chinese_large") 26 | ``` 27 | 28 | ### About CLUE benchmark 29 | 30 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 31 | 32 | Github: https://github.com/CLUEbenchmark 33 | Website: https://www.cluebenchmarks.com/ 34 | -------------------------------------------------------------------------------- /adapter_docs/classes/weights_loaders.rst: -------------------------------------------------------------------------------- 1 | Weights Loaders 2 | ======================= 3 | 4 | These classes perform the extraction, saving and loading of module weights to and from the file system. 5 | All type-specific loader classes inherit from the common ``WeightsLoader`` base class which can also be extended 6 | to add support for additional custom modules. 7 | 8 | These classes provide the basis of adapter module integration into model classes such as adapter saving and loading. 9 | Depending on the model, one of these mixins should be implemented by every adapter-supporting model class. 10 | 11 | WeightsLoader 12 | ------------------ 13 | 14 | .. autoclass:: transformers.WeightsLoader 15 | :members: 16 | 17 | AdapterLoader 18 | --------------------------- 19 | 20 | .. autoclass:: transformers.AdapterLoader 21 | :members: 22 | 23 | PredictionHeadLoader 24 | --------------------------- 25 | 26 | .. autoclass:: transformers.PredictionHeadLoader 27 | :members: 28 | 29 | WeightsLoaderHelper 30 | ------------------- 31 | 32 | .. autoclass:: transformers.WeightsLoaderHelper 33 | :members: 34 | -------------------------------------------------------------------------------- /model_cards/ViktorAlm/electra-base-norwegian-uncased-discriminator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: norwegian 3 | thumbnail: https://i.imgur.com/QqSEC5I.png 4 | --- 5 | 6 | # Norwegian Electra 7 | ![Image of norwegian electra](https://i.imgur.com/QqSEC5I.png) 8 | 9 | Trained on Oscar + wikipedia + opensubtitles + some other data I had with the awesome power of TPUs(V3-8) 10 | 11 | Use with caution. I have no downstream tasks in Norwegian to test on so I have no idea of its performance yet. 12 | # Model 13 | ## Electra: Pre-training Text Encoders as Discriminators Rather Than Generators 14 | Kevin Clark and Minh-Thang Luong and Quoc V. Le and Christopher D. Manning 15 | - https://openreview.net/pdf?id=r1xMH1BtvB 16 | - https://github.com/google-research/electra 17 | # Acknowledgments 18 | ### TensorFlow Research Cloud 19 | Research supported with Cloud TPUs from Google's TensorFlow Research Cloud (TFRC). Thanks for providing access to the TFRC ❤️ 20 | - https://www.tensorflow.org/tfrc 21 | #### OSCAR corpus 22 | - https://oscar-corpus.com/ 23 | #### OPUS 24 | - http://opus.nlpl.eu/ 25 | - http://www.opensubtitles.org/ 26 | -------------------------------------------------------------------------------- /src/transformers/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import torch 5 | 6 | from transformers.file_utils import WEIGHTS_NAME 7 | 8 | 9 | DIALOGPT_MODELS = ["small", "medium", "large"] 10 | 11 | OLD_KEY = "lm_head.decoder.weight" 12 | NEW_KEY = "lm_head.weight" 13 | 14 | 15 | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str): 16 | d = torch.load(checkpoint_path) 17 | d[NEW_KEY] = d.pop(OLD_KEY) 18 | os.makedirs(pytorch_dump_folder_path, exist_ok=True) 19 | torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)) 20 | 21 | 22 | if __name__ == "__main__": 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument("--dialogpt_path", default=".", type=str) 25 | args = parser.parse_args() 26 | for MODEL in DIALOGPT_MODELS: 27 | checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl") 28 | pytorch_dump_folder_path = f"./DialoGPT-{MODEL}" 29 | convert_dialogpt_checkpoint( 30 | checkpoint_path, pytorch_dump_folder_path, 31 | ) 32 | -------------------------------------------------------------------------------- /model_cards/DeepPavlov/bert-base-multilingual-cased-sentence/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - multilingual 4 | --- 5 | 6 | # bert-base-multilingual-cased-sentence 7 | 8 | Sentence Multilingual BERT \(101 languages, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) is a representation‑based sentence encoder for 101 languages of Multilingual BERT. It is initialized with Multilingual BERT and then fine‑tuned on english MultiNLI\[1\] and on dev set of multilingual XNLI\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\]. 9 | 10 | 11 | \[1\]: Williams A., Nangia N. & Bowman S. \(2017\) A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference. arXiv preprint [arXiv:1704.05426](https://arxiv.org/abs/1704.05426) 12 | 13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053) 14 | 15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084) 16 | -------------------------------------------------------------------------------- /tests/test_adapter_fusion_saving.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from transformers import ADAPTERFUSION_CONFIG_MAP, AdapterType, BertModel, RobertaModel, XLMRobertaModel 4 | 5 | from .utils import require_torch 6 | 7 | 8 | @require_torch 9 | class AdapterFusionModelTest(unittest.TestCase): 10 | model_classes = [BertModel, RobertaModel, XLMRobertaModel] 11 | 12 | def test_model_config_serialization(self): 13 | """PretrainedConfigurations should not raise an Exception when serializing the config dict 14 | 15 | See, e.g., PretrainedConfig.to_json_string() 16 | """ 17 | for model_class in self.model_classes: 18 | for k, v in ADAPTERFUSION_CONFIG_MAP.items(): 19 | model_config = model_class.config_class 20 | model = model_class(model_config()) 21 | model.add_adapter("test1", AdapterType.text_task) 22 | model.add_adapter("test2", AdapterType.text_task) 23 | model.add_fusion(["test1", "test2"], adapter_fusion_config=v) 24 | # should not raise an exception 25 | model.config.to_json_string() 26 | -------------------------------------------------------------------------------- /examples/token-classification/test_ner_examples.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import unittest 4 | from unittest.mock import patch 5 | 6 | import run_ner 7 | 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | logger = logging.getLogger() 12 | 13 | 14 | class ExamplesTests(unittest.TestCase): 15 | def test_run_ner(self): 16 | stream_handler = logging.StreamHandler(sys.stdout) 17 | logger.addHandler(stream_handler) 18 | 19 | testargs = """ 20 | --model_name distilbert-base-german-cased 21 | --output_dir ./tests/fixtures/tests_samples/temp_dir 22 | --overwrite_output_dir 23 | --data_dir ./tests/fixtures/tests_samples/GermEval 24 | --labels ./tests/fixtures/tests_samples/GermEval/labels.txt 25 | --max_seq_length 128 26 | --num_train_epochs 6 27 | --logging_steps 1 28 | --do_train 29 | --do_eval 30 | """.split() 31 | with patch.object(sys, "argv", ["run.py"] + testargs): 32 | result = run_ner.main() 33 | self.assertLess(result["eval_loss"], 1.5) 34 | -------------------------------------------------------------------------------- /model_cards/deepset/bert-base-german-cased-oldvocab/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: german 3 | thumbnail: https://static.tildacdn.com/tild6438-3730-4164-b266-613634323466/german_bert.png 4 | tags: 5 | - exbert 6 | --- 7 | 8 | 9 | 10 | 11 | 12 | # German BERT with old vocabulary 13 | For details see the related [FARM issue](https://github.com/deepset-ai/FARM/issues/60). 14 | 15 | 16 | ## About us 17 | ![deepset logo](https://raw.githubusercontent.com/deepset-ai/FARM/master/docs/img/deepset_logo.png) 18 | 19 | We bring NLP to the industry via open source! 20 | Our focus: Industry specific language models & large scale QA systems. 21 | 22 | Some of our work: 23 | - [German BERT (aka "bert-base-german-cased")](https://deepset.ai/german-bert) 24 | - [FARM](https://github.com/deepset-ai/FARM) 25 | - [Haystack](https://github.com/deepset-ai/haystack/) 26 | 27 | Get in touch: 28 | [Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Website](https://deepset.ai) 29 | -------------------------------------------------------------------------------- /docs/source/model_doc/encoderdecoder.rst: -------------------------------------------------------------------------------- 1 | Encoder Decoder Models 2 | ----------- 3 | 4 | This class can wrap an encoder model, such as ``BertModel`` and a decoder modeling with a language modeling head, such as ``BertForMaskedLM`` into a encoder-decoder model. 5 | 6 | The ``EncoderDecoderModel`` class allows to instantiate a encoder decoder model using the ``from_encoder_decoder_pretrain`` class method taking a pretrained encoder and pretrained decoder model as an input. 7 | The ``EncoderDecoderModel`` is saved using the standard ``save_pretrained()`` method and can also again be loaded using the standard ``from_pretrained()`` method. 8 | 9 | An application of this architecture could be *summarization* using two pretrained Bert models as is shown in the paper: `Text Summarization with Pretrained Encoders `_ by Yang Liu and Mirella Lapata. 10 | 11 | 12 | ``EncoderDecoderConfig`` 13 | ~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: transformers.EncoderDecoderConfig 16 | :members: 17 | 18 | 19 | ``EncoderDecoderModel`` 20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 21 | 22 | .. autoclass:: transformers.EncoderDecoderModel 23 | :members: 24 | -------------------------------------------------------------------------------- /examples/text-classification/run_pl.sh: -------------------------------------------------------------------------------- 1 | # Install newest ptl. 2 | pip install -U git+http://github.com/PyTorchLightning/pytorch-lightning/ 3 | # Install example requirements 4 | pip install -r ../requirements.txt 5 | 6 | # Download glue data 7 | python3 ../../utils/download_glue_data.py 8 | 9 | export TASK=mrpc 10 | export DATA_DIR=./glue_data/MRPC/ 11 | export MAX_LENGTH=128 12 | export LEARNING_RATE=2e-5 13 | export BERT_MODEL=bert-base-cased 14 | export BATCH_SIZE=32 15 | export NUM_EPOCHS=3 16 | export SEED=2 17 | export OUTPUT_DIR_NAME=mrpc-pl-bert 18 | export CURRENT_DIR=${PWD} 19 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 20 | 21 | # Make output directory if it doesn't exist 22 | mkdir -p $OUTPUT_DIR 23 | # Add parent directory to python path to access lightning_base.py 24 | export PYTHONPATH="../":"${PYTHONPATH}" 25 | 26 | python3 run_pl_glue.py --data_dir $DATA_DIR \ 27 | --task $TASK \ 28 | --model_name_or_path $BERT_MODEL \ 29 | --output_dir $OUTPUT_DIR \ 30 | --max_seq_length $MAX_LENGTH \ 31 | --learning_rate $LEARNING_RATE \ 32 | --num_train_epochs $NUM_EPOCHS \ 33 | --train_batch_size $BATCH_SIZE \ 34 | --seed $SEED \ 35 | --do_train \ 36 | --do_predict 37 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/train.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0001 none none A plane is taking off. An air plane is taking off. 5.000 3 | 1 main-captions MSRvid 2012test 0004 none none A man is playing a large flute. A man is playing a flute. 3.800 4 | 2 main-captions MSRvid 2012test 0005 none none A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 3.800 5 | 3 main-captions MSRvid 2012test 0006 none none Three men are playing chess. Two men are playing chess. 2.600 6 | 4 main-captions MSRvid 2012test 0009 none none A man is playing the cello. A man seated is playing the cello. 4.250 7 | 5 main-captions MSRvid 2012test 0011 none none Some men are fighting. Two men are fighting. 4.250 8 | 6 main-captions MSRvid 2012test 0012 none none A man is smoking. A man is skating. 0.500 9 | 7 main-captions MSRvid 2012test 0013 none none The man is playing the piano. The man is playing the guitar. 1.600 10 | 8 main-captions MSRvid 2012test 0014 none none A man is playing on a guitar and singing. A woman is playing an acoustic guitar and singing. 2.200 11 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/STS-B/dev.tsv: -------------------------------------------------------------------------------- 1 | index genre filename year old_index source1 source2 sentence1 sentence2 score 2 | 0 main-captions MSRvid 2012test 0000 none none A man with a hard hat is dancing. A man wearing a hard hat is dancing. 5.000 3 | 1 main-captions MSRvid 2012test 0002 none none A young child is riding a horse. A child is riding a horse. 4.750 4 | 2 main-captions MSRvid 2012test 0003 none none A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 5.000 5 | 3 main-captions MSRvid 2012test 0007 none none A woman is playing the guitar. A man is playing guitar. 2.400 6 | 4 main-captions MSRvid 2012test 0008 none none A woman is playing the flute. A man is playing a flute. 2.750 7 | 5 main-captions MSRvid 2012test 0010 none none A woman is cutting an onion. A man is cutting onions. 2.615 8 | 6 main-captions MSRvid 2012test 0015 none none A man is erasing a chalk board. The man is erasing the chalk board. 5.000 9 | 7 main-captions MSRvid 2012test 0023 none none A woman is carrying a boy. A woman is carrying her baby. 2.333 10 | 8 main-captions MSRvid 2012test 0027 none none Three men are playing guitars. Three men are on stage playing guitars. 3.750 11 | -------------------------------------------------------------------------------- /model_cards/canwenxu/BERT-of-Theseus-MNLI/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | thumbnail: https://raw.githubusercontent.com/JetRunner/BERT-of-Theseus/master/bert-of-theseus.png 3 | --- 4 | 5 | # BERT-of-Theseus 6 | See our paper ["BERT-of-Theseus: Compressing BERT by Progressive Module Replacing"](http://arxiv.org/abs/2002.02925). 7 | 8 | BERT-of-Theseus is a new compressed BERT by progressively replacing the components of the original BERT. 9 | 10 | ![BERT of Theseus](https://github.com/JetRunner/BERT-of-Theseus/blob/master/bert-of-theseus.png?raw=true) 11 | 12 | ## Load Pretrained Model on MNLI 13 | 14 | We provide a 6-layer pretrained model on MNLI as a general-purpose model, which can transfer to other sentence classification tasks, outperforming DistillBERT (with the same 6-layer structure) on six tasks of GLUE (dev set). 15 | 16 | | Method | MNLI | MRPC | QNLI | QQP | RTE | SST-2 | STS-B | 17 | |-----------------|------|------|------|------|------|-------|-------| 18 | | BERT-base | 83.5 | 89.5 | 91.2 | 89.8 | 71.1 | 91.5 | 88.9 | 19 | | DistillBERT | 79.0 | 87.5 | 85.3 | 84.9 | 59.9 | 90.7 | 81.2 | 20 | | BERT-of-Theseus | 82.1 | 87.5 | 88.8 | 88.8 | 70.1 | 91.8 | 87.8 | 21 | -------------------------------------------------------------------------------- /model_cards/clue/roberta_chinese_base/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: chinese 3 | --- 4 | 5 | ## roberta_chinese_base 6 | 7 | ### Overview 8 | 9 | **Language model:** roberta-base 10 | **Model size:** 392M 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!! 22 | 23 | ``` 24 | import torch 25 | from transformers import BertTokenizer, BertModel 26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_base") 27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_base") 28 | ``` 29 | 30 | ### About CLUE benchmark 31 | 32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 33 | 34 | Github: https://github.com/CLUEbenchmark 35 | Website: https://www.cluebenchmarks.com/ 36 | -------------------------------------------------------------------------------- /model_cards/clue/roberta_chinese_large/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: chinese 3 | --- 4 | 5 | ## roberta_chinese_large 6 | 7 | ### Overview 8 | 9 | **Language model:** roberta-large 10 | **Model size:** 1.2G 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!! 22 | 23 | ``` 24 | import torch 25 | from transformers import BertTokenizer, BertModel 26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_large") 27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_large") 28 | ``` 29 | 30 | ### About CLUE benchmark 31 | 32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 33 | 34 | Github: https://github.com/CLUEbenchmark 35 | Website: https://www.cluebenchmarks.com/ 36 | -------------------------------------------------------------------------------- /model_cards/surajp/albert-base-sanskrit/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: sanskrit 3 | --- 4 | 5 | 6 | # ALBERT-base-Sanskrit 7 | 8 | 9 | Explaination Notebook Colab: [SanskritALBERT.ipynb](https://colab.research.google.com/github/parmarsuraj99/suraj-parmar/blob/master/_notebooks/2020-05-02-SanskritALBERT.ipynb) 10 | 11 | Size of the model is **46MB** 12 | 13 | Example of usage: 14 | 15 | ``` 16 | tokenizer = AutoTokenizer.from_pretrained("surajp/albert-base-sanskrit") 17 | model = AutoModel.from_pretrained("surajp/albert-base-sanskrit") 18 | 19 | enc=tokenizer.encode("ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥") 20 | print(tokenizer.decode(enc)) 21 | 22 | ps = model(torch.tensor(enc).unsqueeze(1)) 23 | print(ps[0].shape) 24 | ``` 25 | ``` 26 | ''' 27 | Output: 28 | -------- 29 | [CLS] ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥[SEP] 30 | torch.Size([28, 1, 768]) 31 | ``` 32 | 33 | 34 | > Created by [Suraj Parmar/@parmarsuraj99](https://twitter.com/parmarsuraj99) 35 | 36 | > Made with in India 37 | -------------------------------------------------------------------------------- /examples/benchmarking/run_benchmark.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The HuggingFace Inc. team. 3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """ Benchmarking the library on inference and training """ 17 | 18 | from transformers import HfArgumentParser, PyTorchBenchmark, PyTorchBenchmarkArguments 19 | 20 | 21 | def main(): 22 | parser = HfArgumentParser(PyTorchBenchmarkArguments) 23 | benchmark_args = parser.parse_args_into_dataclasses()[0] 24 | benchmark = PyTorchBenchmark(args=benchmark_args) 25 | benchmark.run() 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /model_cards/wptoux/albert-chinese-large-qa/README.md: -------------------------------------------------------------------------------- 1 | # albert-chinese-large-qa 2 | Albert large QA model pretrained from baidu webqa and baidu dureader datasets. 3 | 4 | ## Data source 5 | + baidu webqa 1.0 6 | + baidu dureader 7 | 8 | ## Traing Method 9 | We combined the two datasets together and created a new dataset in squad format, including 705139 samples for training and 69638 samples for validation. 10 | We finetune the model based on the albert chinese large model. 11 | 12 | ## Hyperparams 13 | + learning_rate 1e-5 14 | + max_seq_length 512 15 | + max_query_length 50 16 | + max_answer_length 300 17 | + doc_stride 256 18 | + num_train_epochs 2 19 | + warmup_steps 1000 20 | + per_gpu_train_batch_size 8 21 | + gradient_accumulation_steps 3 22 | + n_gpu 2 (Nvidia Tesla P100) 23 | 24 | ## Usage 25 | ``` 26 | from transformers import AutoModelForQuestionAnswering, BertTokenizer 27 | 28 | model = AutoModelForQuestionAnswering.from_pretrained('wptoux/albert-chinese-large-qa') 29 | tokenizer = BertTokenizer.from_pretrained('wptoux/albert-chinese-large-qa') 30 | ``` 31 | ***Important: use BertTokenizer*** 32 | 33 | ## MoreInfo 34 | Please visit https://github.com/wptoux/albert-chinese-large-webqa for details. 35 | -------------------------------------------------------------------------------- /model_cards/clue/albert_chinese_tiny/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: chinese 3 | --- 4 | 5 | ## albert_chinese_tiny 6 | 7 | ### Overview 8 | 9 | **Language model:** albert-tiny 10 | **Model size:** 16M 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_tiny` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!! 22 | 23 | ``` 24 | import torch 25 | from transformers import BertTokenizer, AlbertModel 26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_tiny") 27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_tiny") 28 | ``` 29 | 30 | ### About CLUE benchmark 31 | 32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 33 | 34 | Github: https://github.com/CLUEbenchmark 35 | Website: https://www.cluebenchmarks.com/ 36 | -------------------------------------------------------------------------------- /model_cards/allenai/scibert_scivocab_cased/README.md: -------------------------------------------------------------------------------- 1 | # SciBERT 2 | 3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text. 4 | 5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts. 6 | 7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions. 8 | 9 | Available models include: 10 | * `scibert_scivocab_cased` 11 | * `scibert_scivocab_uncased` 12 | 13 | 14 | The original repo can be found [here](https://github.com/allenai/scibert). 15 | 16 | If using these models, please cite the following paper: 17 | ``` 18 | @inproceedings{beltagy-etal-2019-scibert, 19 | title = "SciBERT: A Pretrained Language Model for Scientific Text", 20 | author = "Beltagy, Iz and Lo, Kyle and Cohan, Arman", 21 | booktitle = "EMNLP", 22 | year = "2019", 23 | publisher = "Association for Computational Linguistics", 24 | url = "https://www.aclweb.org/anthology/D19-1371" 25 | } 26 | ``` 27 | -------------------------------------------------------------------------------- /model_cards/clue/albert_chinese_small/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: chinese 3 | --- 4 | 5 | ## albert_chinese_small 6 | 7 | ### Overview 8 | 9 | **Language model:** albert-small 10 | **Model size:** 18.5M 11 | **Language:** Chinese 12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020) 13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE) 14 | 15 | ### Results 16 | 17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE). 18 | 19 | ### Usage 20 | 21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_small` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!! 22 | 23 | ``` 24 | import torch 25 | from transformers import BertTokenizer, AlbertModel 26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_small") 27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_small") 28 | ``` 29 | 30 | ### About CLUE benchmark 31 | 32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard. 33 | 34 | Github: https://github.com/CLUEbenchmark 35 | Website: https://www.cluebenchmarks.com/ 36 | -------------------------------------------------------------------------------- /model_cards/allenai/scibert_scivocab_uncased/README.md: -------------------------------------------------------------------------------- 1 | # SciBERT 2 | 3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text. 4 | 5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts. 6 | 7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions. 8 | 9 | Available models include: 10 | * `scibert_scivocab_cased` 11 | * `scibert_scivocab_uncased` 12 | 13 | 14 | The original repo can be found [here](https://github.com/allenai/scibert). 15 | 16 | If using these models, please cite the following paper: 17 | ``` 18 | @inproceedings{beltagy-etal-2019-scibert, 19 | title = "SciBERT: A Pretrained Language Model for Scientific Text", 20 | author = "Beltagy, Iz and Lo, Kyle and Cohan, Arman", 21 | booktitle = "EMNLP", 22 | year = "2019", 23 | publisher = "Association for Computational Linguistics", 24 | url = "https://www.aclweb.org/anthology/D19-1371" 25 | } 26 | ``` 27 | -------------------------------------------------------------------------------- /model_cards/julien-c/EsperBERTo-small-pos/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: esperanto 3 | thumbnail: https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png 4 | --- 5 | 6 | # EsperBERTo: RoBERTa-like Language model trained on Esperanto 7 | 8 | **Companion model to blog post https://huggingface.co/blog/how-to-train** 🔥 9 | 10 | ## Training Details 11 | 12 | - current checkpoint: 566000 13 | - machine name: `galinette` 14 | 15 | 16 | ![](https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png) 17 | 18 | ## Example pipeline 19 | 20 | ```python 21 | from transformers import TokenClassificationPipeline, pipeline 22 | 23 | 24 | MODEL_PATH = "./models/EsperBERTo-small-pos/" 25 | 26 | nlp = pipeline( 27 | "ner", 28 | model=MODEL_PATH, 29 | tokenizer=MODEL_PATH, 30 | ) 31 | # or instantiate a TokenClassificationPipeline directly. 32 | 33 | nlp("Mi estas viro kej estas tago varma.") 34 | 35 | # {'entity': 'PRON', 'score': 0.9979867339134216, 'word': ' Mi'} 36 | # {'entity': 'VERB', 'score': 0.9683094620704651, 'word': ' estas'} 37 | # {'entity': 'VERB', 'score': 0.9797462821006775, 'word': ' estas'} 38 | # {'entity': 'NOUN', 'score': 0.8509314060211182, 'word': ' tago'} 39 | # {'entity': 'ADJ', 'score': 0.9996201395988464, 'word': ' varma'} 40 | ``` -------------------------------------------------------------------------------- /model_cards/DeepPavlov/bert-base-cased-conversational/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - english 4 | --- 5 | 6 | # bert-base-cased-conversational 7 | 8 | Conversational BERT \(English, cased, 12‑layer, 768‑hidden, 12‑heads, 110M parameters\) was trained on the English part of Twitter, Reddit, DailyDialogues\[1\], OpenSubtitles\[2\], Debates\[3\], Blogs\[4\], Facebook News Comments. We used this training data to build the vocabulary of English subtokens and took English cased version of BERT‑base as an initialization for English Conversational BERT. 9 | 10 | 11 | \[1\]: Yanran Li, Hui Su, Xiaoyu Shen, Wenjie Li, Ziqiang Cao, and Shuzi Niu. DailyDialog: A Manually Labelled Multi-turn Dialogue Dataset. IJCNLP 2017. 12 | 13 | \[2\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\) 14 | 15 | \[3\]: Justine Zhang, Ravi Kumar, Sujith Ravi, Cristian Danescu-Niculescu-Mizil. Proceedings of NAACL, 2016. 16 | 17 | \[4\]: J. Schler, M. Koppel, S. Argamon and J. Pennebaker \(2006\). Effects of Age and Gender on Blogging in Proceedings of 2006 AAAI Spring Symposium on Computational Approaches for Analyzing Weblogs. 18 | -------------------------------------------------------------------------------- /examples/summarization/t5/download_cnn_daily_mail.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | import tensorflow_datasets as tfds 5 | 6 | 7 | def main(input_path, reference_path, data_dir): 8 | cnn_ds = tfds.load("cnn_dailymail", split="test", shuffle_files=False, data_dir=data_dir) 9 | cnn_ds_iter = tfds.as_numpy(cnn_ds) 10 | 11 | test_articles_file = Path(input_path).open("w") 12 | test_summaries_file = Path(reference_path).open("w") 13 | 14 | for example in cnn_ds_iter: 15 | test_articles_file.write(example["article"].decode("utf-8") + "\n") 16 | test_articles_file.flush() 17 | test_summaries_file.write(example["highlights"].decode("utf-8").replace("\n", " ") + "\n") 18 | test_summaries_file.flush() 19 | 20 | 21 | if __name__ == "__main__": 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument("input_path", type=str, help="where to save the articles input data") 24 | parser.add_argument( 25 | "reference_path", type=str, help="where to save the reference summaries", 26 | ) 27 | parser.add_argument( 28 | "--data_dir", type=str, default="~/tensorflow_datasets", help="where to save the tensorflow datasets.", 29 | ) 30 | args = parser.parse_args() 31 | main(args.input_path, args.reference_path, args.data_dir) 32 | -------------------------------------------------------------------------------- /model_cards/illuin/camembert-base-fquad/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: french 3 | --- 4 | 5 | # camembert-base-fquad 6 | 7 | ## Description 8 | 9 | A native French Question Answering model [CamemBERT-base](https://camembert-model.fr/) fine-tuned on [FQuAD](https://fquad.illuin.tech/). 10 | 11 | ## Evaluation results 12 | 13 | On the development set. 14 | 15 | ```shell 16 | {"f1": 88.1, "exact_match": 78.1} 17 | ``` 18 | 19 | On the test set. 20 | 21 | ```shell 22 | {"f1": 88.3, "exact_match": 78.0} 23 | ``` 24 | 25 | ## Usage 26 | 27 | ```python 28 | from transformers import pipeline 29 | 30 | nlp = pipeline('question-answering', model='illuin/camembert-base-fquad', tokenizer='illuin/camembert-base-fquad') 31 | 32 | nlp({ 33 | 'question': "Qui est Claude Monet?", 34 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme." 35 | }) 36 | ``` 37 | 38 | ## Citation 39 | 40 | If you use our work, please cite: 41 | 42 | ```bibtex 43 | @article{dHoffschmidt2020FQuADFQ, 44 | title={FQuAD: French Question Answering Dataset}, 45 | author={Martin d'Hoffschmidt and Maxime Vidal and Wacim Belblidia and Tom Brendl'e and Quentin Heinrich}, 46 | journal={ArXiv}, 47 | year={2020}, 48 | volume={abs/2002.06071} 49 | } 50 | ``` 51 | -------------------------------------------------------------------------------- /model_cards/allenai/longformer-base-4096/README.md: -------------------------------------------------------------------------------- 1 | 2 | # longformer-base-4096 3 | [Longformer](https://arxiv.org/abs/2004.05150) is a transformer model for long documents. 4 | 5 | `longformer-base-4096` is a BERT-like model started from the RoBERTa checkpoint and pretrained for MLM on long documents. It supports sequences of length up to 4,096. 6 | 7 | Longformer uses a combination of a sliding window (local) attention and global attention. Global attention is user-configured based on the task to allow the model to learn task-specific representations. 8 | Please refer to the examples in `modeling_longformer.py` and the paper for more details on how to set global attention. 9 | 10 | 11 | ### Citing 12 | 13 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150). 14 | ``` 15 | @article{Beltagy2020Longformer, 16 | title={Longformer: The Long-Document Transformer}, 17 | author={Iz Beltagy and Matthew E. Peters and Arman Cohan}, 18 | journal={arXiv:2004.05150}, 19 | year={2020}, 20 | } 21 | ``` 22 | 23 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org). 24 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering. 25 | -------------------------------------------------------------------------------- /src/transformers/commands/transformers_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from argparse import ArgumentParser 3 | 4 | from transformers.commands.convert import ConvertCommand 5 | from transformers.commands.download import DownloadCommand 6 | from transformers.commands.env import EnvironmentCommand 7 | from transformers.commands.run import RunCommand 8 | from transformers.commands.serving import ServeCommand 9 | from transformers.commands.user import UserCommands 10 | 11 | 12 | def main(): 13 | parser = ArgumentParser("Transformers CLI tool", usage="transformers-cli []") 14 | commands_parser = parser.add_subparsers(help="transformers-cli command helpers") 15 | 16 | # Register commands 17 | ConvertCommand.register_subcommand(commands_parser) 18 | DownloadCommand.register_subcommand(commands_parser) 19 | EnvironmentCommand.register_subcommand(commands_parser) 20 | RunCommand.register_subcommand(commands_parser) 21 | ServeCommand.register_subcommand(commands_parser) 22 | UserCommands.register_subcommand(commands_parser) 23 | 24 | # Let's go 25 | args = parser.parse_args() 26 | 27 | if not hasattr(args, "func"): 28 | parser.print_help() 29 | exit(1) 30 | 31 | # Run 32 | service = args.func(args) 33 | service.run() 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /model_cards/jplu/tf-camembert-base/README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow CamemBERT 2 | 3 | In this repository you will find different versions of the CamemBERT model for Tensorflow. 4 | 5 | ## CamemBERT 6 | 7 | [CamemBERT](https://camembert-model.fr/) is a state-of-the-art language model for French based on the RoBERTa architecture pretrained on the French subcorpus of the newly available multilingual corpus OSCAR. 8 | 9 | ## Model Weights 10 | 11 | | Model | Downloads 12 | | -------------------------------- | --------------------------------------------------------------------------------------------------------------- 13 | | `jplu/tf-camembert-base` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-camembert-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-camembert-base/tf_model.h5) 14 | 15 | ## Usage 16 | 17 | With Transformers >= 2.4 the Tensorflow models of CamemBERT can be loaded like: 18 | 19 | ```python 20 | from transformers import TFCamembertModel 21 | 22 | model = TFCamembertModel.from_pretrained("jplu/tf-camembert-base") 23 | ``` 24 | 25 | ## Huggingface model hub 26 | 27 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu). 28 | 29 | ## Acknowledgments 30 | 31 | Thanks to all the Huggingface team for the support and their amazing library! 32 | -------------------------------------------------------------------------------- /model_cards/google/reformer-crime-and-punishment/README.md: -------------------------------------------------------------------------------- 1 | ## Reformer Model trained on "Crime and Punishment" 2 | 3 | Crime and Punishment is a novel written by Fyodor Dostoevsky and was translated into English. 4 | 5 | Crime and Punishment training data was taken from `gs://trax-ml/reformer/crime-and-punishment-2554.txt` and contains 6 | roughly 0.5M tokens. 7 | 8 | The ReformerLM model was trained in flax using colab notebook proposed by authors: https://colab.research.google.com/github/google/trax/blob/master/trax/models/reformer/text_generation.ipynb and the weights were converted to Hugging Face's PyTorch ReformerLM model `ReformerModelWithLMHead`. 9 | 10 | The model is a language model that operates on small sub-word units. Text can be generated as follows: 11 | 12 | ```python 13 | model = ReformerModelWithLMHead.from_pretrained("patrickvonplaten/reformer-crime-and-punish") 14 | tok = ReformerTokenizer.from_pretrained("patrickvonplaten/reformer-crime-and-punish") 15 | tok.decode(model.generate(tok.encode("A few months later", return_tensors="pt"), do_sample=True,temperature=0.7, max_length=100)[0]) 16 | 17 | # gives:'A few months later on was more than anything in the flat. 18 | # “I have already.” “That’s not my notion that he had forgotten him. 19 | # What does that matter? And why do you mean? It’s only another fellow,” he said as he went out, as though he want' 20 | ``` 21 | -------------------------------------------------------------------------------- /tests/test_adapter_fusion_config.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from dataclasses import FrozenInstanceError 3 | 4 | from transformers import ADAPTERFUSION_CONFIG_MAP, AdapterFusionConfig 5 | 6 | from .utils import require_torch 7 | 8 | 9 | @require_torch 10 | class AdapterFusionConfigTest(unittest.TestCase): 11 | 12 | config_names = ADAPTERFUSION_CONFIG_MAP.keys() 13 | 14 | def test_config_load(self): 15 | for config_name in self.config_names: 16 | with self.subTest(config_name=config_name): 17 | config = AdapterFusionConfig.load(config_name, temperature=True) 18 | self.assertTrue(isinstance(config, AdapterFusionConfig)) 19 | self.assertEqual(config.temperature, True) 20 | 21 | def test_config_immutable(self): 22 | def set_attr(config: AdapterFusionConfig): 23 | config.temperature = True 24 | 25 | for config in ADAPTERFUSION_CONFIG_MAP.values(): 26 | with self.subTest(config=config.__class__.__name__): 27 | self.assertRaises(FrozenInstanceError, lambda: set_attr(config)) 28 | 29 | def test_custom_attr(self): 30 | for config in ADAPTERFUSION_CONFIG_MAP.values(): 31 | with self.subTest(config=config.__class__.__name__): 32 | config.dummy_attr = "test_value" 33 | self.assertEqual(config.dummy_attr, "test_value") 34 | -------------------------------------------------------------------------------- /model_cards/lvwerra/gpt2-imdb-pos/README.md: -------------------------------------------------------------------------------- 1 | # GPT2-IMDB-pos 2 | 3 | ## What is it? 4 | A small GPT2 (`lvwerra/gpt2-imdb`) language model fine-tuned to produce positive movie reviews based the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). The model is trained with rewards from a BERT sentiment classifier (`lvwerra/gpt2-imdb`) via PPO. 5 | 6 | ## Training setting 7 | The model was trained for `100` optimisation steps with a batch size of `256` which corresponds to `25600` training samples. The full experiment setup can be found in the Jupyter notebook in the [trl repo](https://lvwerra.github.io/trl/04-gpt2-sentiment-ppo-training/). 8 | 9 | ## Examples 10 | A few examples of the model response to a query before and after optimisation: 11 | 12 | | query | response (before) | response (after) | rewards (before) | rewards (after) | 13 | |-------|-------------------|------------------|------------------|-----------------| 14 | |I'd never seen a |heavier, woodier example of Victorian archite... |film of this caliber, and I think it's wonder... |3.297736 |4.158653| 15 | |I love John's work |but I actually have to write language as in w... |and I hereby recommend this film. I am really... |-1.904006 |4.159198 | 16 | |I's a big struggle |to see anyone who acts in that way. by Jim Th... |, but overall I'm happy with the changes even ... |-1.595925 |2.651260| 17 | 18 | 19 | -------------------------------------------------------------------------------- /model_cards/twmkn9/albert-base-v2-squad2/README.md: -------------------------------------------------------------------------------- 1 | This model is [ALBERT base v2](https://huggingface.co/albert-base-v2) trained on SQuAD v2 as: 2 | 3 | ``` 4 | export SQUAD_DIR=../../squad2 5 | python3 run_squad.py 6 | --model_type albert 7 | --model_name_or_path albert-base-v2 8 | --do_train 9 | --do_eval 10 | --overwrite_cache 11 | --do_lower_case 12 | --version_2_with_negative 13 | --save_steps 100000 14 | --train_file $SQUAD_DIR/train-v2.0.json 15 | --predict_file $SQUAD_DIR/dev-v2.0.json 16 | --per_gpu_train_batch_size 8 17 | --num_train_epochs 3 18 | --learning_rate 3e-5 19 | --max_seq_length 384 20 | --doc_stride 128 21 | --output_dir ./tmp/albert_fine/ 22 | ``` 23 | 24 | Performance on a dev subset is close to the original paper: 25 | 26 | ``` 27 | Results: 28 | { 29 | 'exact': 78.71010200723923, 30 | 'f1': 81.89228117126069, 31 | 'total': 6078, 32 | 'HasAns_exact': 75.39518900343643, 33 | 'HasAns_f1': 82.04167868004215, 34 | 'HasAns_total': 2910, 35 | 'NoAns_exact': 81.7550505050505, 36 | 'NoAns_f1': 81.7550505050505, 37 | 'NoAns_total': 3168, 38 | 'best_exact': 78.72655478775913, 39 | 'best_exact_thresh': 0.0, 40 | 'best_f1': 81.90873395178066, 41 | 'best_f1_thresh': 0.0 42 | } 43 | ``` 44 | 45 | We are hopeful this might save you time, energy, and compute. Cheers! -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/MRPC/dev.tsv: -------------------------------------------------------------------------------- 1 | Quality #1 ID #2 ID #1 String #2 String 2 | 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy . 3 | 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war . 4 | 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent . 5 | 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . 6 | 0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty . 7 | 1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status . 8 | -------------------------------------------------------------------------------- /tests/fixtures/tests_samples/MRPC/train.tsv: -------------------------------------------------------------------------------- 1 | Quality #1 ID #2 ID #1 String #2 String 2 | 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy . 3 | 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war . 4 | 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent . 5 | 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . 6 | 0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty . 7 | 1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status . 8 | -------------------------------------------------------------------------------- /examples/summarization/t5/README.md: -------------------------------------------------------------------------------- 1 | ***This script evaluates the the multitask pre-trained checkpoint for ``t5-base`` (see paper [here](https://arxiv.org/pdf/1910.10683.pdf)) on the CNN/Daily Mail test dataset. Please note that the results in the paper were attained using a model fine-tuned on summarization, so that results will be worse here by approx. 0.5 ROUGE points*** 2 | 3 | ### Get the CNN Data 4 | First, you need to download the CNN data. It's about ~400 MB and can be downloaded by 5 | running 6 | 7 | ```bash 8 | python download_cnn_daily_mail.py cnn_articles_input_data.txt cnn_articles_reference_summaries.txt 9 | ``` 10 | 11 | You should confirm that each file has 11490 lines: 12 | 13 | ```bash 14 | wc -l cnn_articles_input_data.txt # should print 11490 15 | wc -l cnn_articles_reference_summaries.txt # should print 11490 16 | ``` 17 | 18 | ### Generating Summaries 19 | 20 | To create summaries for each article in dataset, run: 21 | ```bash 22 | python evaluate_cnn.py cnn_articles_input_data.txt cnn_generated_articles_summaries.txt cnn_articles_reference_summaries.txt rouge_score.txt 23 | ``` 24 | The default batch size, 8, fits in 16GB GPU memory, but may need to be adjusted to fit your system. 25 | The rouge scores "rouge1, rouge2, rougeL" are automatically created and saved in ``rouge_score.txt``. 26 | 27 | 28 | ### Finetuning 29 | Pass model_type=t5 and model `examples/summarization/bart/finetune.py` 30 | -------------------------------------------------------------------------------- /model_cards/digitalepidemiologylab/covid-twitter-bert/README.md: -------------------------------------------------------------------------------- 1 | # COVID-Twitter-BERT (CT-BERT) 2 | BERT-large-uncased model, pretrained on a corpus of messages from Twitter about COVID-19 3 | 4 | ## Overview 5 | This model was trained on 160M tweets collected between January 12 and April 16, 2020 containing at least one of the keywords "wuhan", "ncov", "coronavirus", "covid", or "sars-cov-2". These tweets were filtered and preprocessed to reach a final sample of 22.5M tweets (containing 40.7M sentences and 633M tokens) which were used for training. 6 | 7 | This model was evaluated based on downstream classification tasks, but it could be used for any other NLP task which can leverage contextual embeddings. 8 | 9 | In order to achieve best results, make sure to use the same text preprocessing as we did for pretraining. This involves replacing user mentions, urls and emojis. You can find a script on our projects [GitHub repo](https://github.com/digitalepidemiologylab/covid-twitter-bert). 10 | 11 | ## Example usage 12 | ```python 13 | tokenizer = AutoTokenizer.from_pretrained("digitalepidemiologylab/covid-twitter-bert") 14 | model = TFAutoModel.from_pretrained("digitalepidemiologylab/covid-twitter-bert") 15 | ``` 16 | 17 | ## References 18 | [1] Martin Müller, Marcel Salaté, Per E Kummervold. "COVID-Twitter-BERT: A Natural Language Processing Model to Analyse COVID-19 Content on Twitter" arXiv preprint arXiv:2005.07503 (2020). 19 | -------------------------------------------------------------------------------- /model_cards/twmkn9/bert-base-uncased-squad2/README.md: -------------------------------------------------------------------------------- 1 | This model is [BERT base uncased](https://huggingface.co/bert-base-uncased) trained on SQuAD v2 as: 2 | 3 | ``` 4 | export SQUAD_DIR=../../squad2 5 | python3 run_squad.py 6 | --model_type bert 7 | --model_name_or_path bert-base-uncased 8 | --do_train 9 | --do_eval 10 | --overwrite_cache 11 | --do_lower_case 12 | --version_2_with_negative 13 | --save_steps 100000 14 | --train_file $SQUAD_DIR/train-v2.0.json 15 | --predict_file $SQUAD_DIR/dev-v2.0.json 16 | --per_gpu_train_batch_size 8 17 | --num_train_epochs 3 18 | --learning_rate 3e-5 19 | --max_seq_length 384 20 | --doc_stride 128 21 | --output_dir ./tmp/bert_fine_tuned/ 22 | ``` 23 | 24 | Performance on a dev subset is close to the original paper: 25 | 26 | ``` 27 | Results: 28 | { 29 | 'exact': 72.35932872655479, 30 | 'f1': 75.75355132564763, 31 | 'total': 6078, 32 | 'HasAns_exact': 74.29553264604812, 33 | 'HasAns_f1': 81.38490892002987, 34 | 'HasAns_total': 2910, 35 | 'NoAns_exact': 70.58080808080808, 36 | 'NoAns_f1': 70.58080808080808, 37 | 'NoAns_total': 3168, 38 | 'best_exact': 72.35932872655479, 39 | 'best_exact_thresh': 0.0, 40 | 'best_f1': 75.75355132564766, 41 | 'best_f1_thresh': 0.0 42 | } 43 | ``` 44 | 45 | We are hopeful this might save you time, energy, and compute. Cheers! -------------------------------------------------------------------------------- /src/transformers/commands/download.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from transformers.commands import BaseTransformersCLICommand 4 | 5 | 6 | def download_command_factory(args): 7 | return DownloadCommand(args.model, args.cache_dir, args.force) 8 | 9 | 10 | class DownloadCommand(BaseTransformersCLICommand): 11 | @staticmethod 12 | def register_subcommand(parser: ArgumentParser): 13 | download_parser = parser.add_parser("download") 14 | download_parser.add_argument( 15 | "--cache-dir", type=str, default=None, help="Path to location to store the models" 16 | ) 17 | download_parser.add_argument( 18 | "--force", action="store_true", help="Force the model to be download even if already in cache-dir" 19 | ) 20 | download_parser.add_argument("model", type=str, help="Name of the model to download") 21 | download_parser.set_defaults(func=download_command_factory) 22 | 23 | def __init__(self, model: str, cache: str, force: bool): 24 | self._model = model 25 | self._cache = cache 26 | self._force = force 27 | 28 | def run(self): 29 | from transformers import AutoModel, AutoTokenizer 30 | 31 | AutoModel.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force) 32 | AutoTokenizer.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force) 33 | -------------------------------------------------------------------------------- /model_cards/twmkn9/distilroberta-base-squad2/README.md: -------------------------------------------------------------------------------- 1 | This model is [Distilroberta base](https://huggingface.co/distilroberta-base) trained on SQuAD v2 as: 2 | 3 | ``` 4 | export SQUAD_DIR=../../squad2 5 | python3 run_squad.py 6 | --model_type robberta 7 | --model_name_or_path distilroberta-base 8 | --do_train 9 | --do_eval 10 | --overwrite_cache 11 | --do_lower_case 12 | --version_2_with_negative 13 | --save_steps 100000 14 | --train_file $SQUAD_DIR/train-v2.0.json 15 | --predict_file $SQUAD_DIR/dev-v2.0.json 16 | --per_gpu_train_batch_size 8 17 | --num_train_epochs 3 18 | --learning_rate 3e-5 19 | --max_seq_length 384 20 | --doc_stride 128 21 | --output_dir ./tmp/distilroberta_fine_tuned/ 22 | ``` 23 | 24 | Performance on a dev subset is close to the original paper: 25 | 26 | ``` 27 | Results: 28 | { 29 | 'exact': 70.9279368213228, 30 | 'f1': 74.60439802429168, 31 | 'total': 6078, 32 | 'HasAns_exact': 67.62886597938144, 33 | 'HasAns_f1': 75.30774267754136, 34 | 'HasAns_total': 2910, 35 | 'NoAns_exact': 73.95833333333333, 36 | 'NoAns_f1': 73.95833333333333, 'NoAns_total': 3168, 37 | 'best_exact': 70.94438960184272, 38 | 'best_exact_thresh': 0.0, 39 | 'best_f1': 74.62085080481161, 40 | 'best_f1_thresh': 0.0 41 | } 42 | ``` 43 | 44 | We are hopeful this might save you time, energy, and compute. Cheers! -------------------------------------------------------------------------------- /model_cards/fmikaelian/camembert-base-fquad/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: french 3 | --- 4 | 5 | # camembert-base-fquad 6 | 7 | ## Description 8 | 9 | A baseline model for question-answering in french ([CamemBERT](https://camembert-model.fr/) model fine-tuned on [FQuAD](https://fquad.illuin.tech/)) 10 | 11 | ## Training hyperparameters 12 | 13 | ```shell 14 | python3 ./examples/question-answering/run_squad.py \ 15 | --model_type camembert \ 16 | --model_name_or_path camembert-base \ 17 | --do_train \ 18 | --do_eval \ 19 | --do_lower_case \ 20 | --train_file train.json \ 21 | --predict_file valid.json \ 22 | --learning_rate 3e-5 \ 23 | --num_train_epochs 2 \ 24 | --max_seq_length 384 \ 25 | --doc_stride 128 \ 26 | --output_dir output \ 27 | --per_gpu_eval_batch_size=3 \ 28 | --per_gpu_train_batch_size=3 \ 29 | --save_steps 10000 30 | ``` 31 | 32 | ## Evaluation results 33 | 34 | ```shell 35 | {"f1": 77.24515316052342, "exact_match": 52.82308657465496} 36 | ``` 37 | 38 | ## Usage 39 | 40 | ```python 41 | from transformers import pipeline 42 | 43 | nlp = pipeline('question-answering', model='fmikaelian/camembert-base-fquad', tokenizer='fmikaelian/camembert-base-fquad') 44 | 45 | nlp({ 46 | 'question': "Qui est Claude Monet?", 47 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme." 48 | }) 49 | ``` -------------------------------------------------------------------------------- /model_cards/julien-c/dummy-unknown/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: 3 | - ci 4 | --- 5 | 6 | ## Dummy model used for unit testing and CI 7 | 8 | 9 | ```python 10 | import json 11 | import os 12 | from transformers.configuration_roberta import RobertaConfig 13 | from transformers import RobertaForMaskedLM, TFRobertaForMaskedLM 14 | 15 | DIRNAME = "./dummy-unknown" 16 | 17 | 18 | config = RobertaConfig(10, 20, 1, 1, 40) 19 | 20 | model = RobertaForMaskedLM(config) 21 | model.save_pretrained(DIRNAME) 22 | 23 | tf_model = TFRobertaForMaskedLM.from_pretrained(DIRNAME, from_pt=True) 24 | tf_model.save_pretrained(DIRNAME) 25 | 26 | # Tokenizer: 27 | 28 | vocab = [ 29 | "l", 30 | "o", 31 | "w", 32 | "e", 33 | "r", 34 | "s", 35 | "t", 36 | "i", 37 | "d", 38 | "n", 39 | "\u0120", 40 | "\u0120l", 41 | "\u0120n", 42 | "\u0120lo", 43 | "\u0120low", 44 | "er", 45 | "\u0120lowest", 46 | "\u0120newer", 47 | "\u0120wider", 48 | "", 49 | ] 50 | vocab_tokens = dict(zip(vocab, range(len(vocab)))) 51 | merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""] 52 | 53 | vocab_file = os.path.join(DIRNAME, "vocab.json") 54 | merges_file = os.path.join(DIRNAME, "merges.txt") 55 | with open(vocab_file, "w", encoding="utf-8") as fp: 56 | fp.write(json.dumps(vocab_tokens) + "\n") 57 | with open(merges_file, "w", encoding="utf-8") as fp: 58 | fp.write("\n".join(merges)) 59 | ``` 60 | -------------------------------------------------------------------------------- /model_cards/twmkn9/distilbert-base-uncased-squad2/README.md: -------------------------------------------------------------------------------- 1 | This model is [Distilbert base uncased](https://huggingface.co/distilbert-base-uncased) trained on SQuAD v2 as: 2 | 3 | ``` 4 | export SQUAD_DIR=../../squad2 5 | python3 run_squad.py 6 | --model_type distilbert 7 | --model_name_or_path distilbert-base-uncased 8 | --do_train 9 | --do_eval 10 | --overwrite_cache 11 | --do_lower_case 12 | --version_2_with_negative 13 | --save_steps 100000 14 | --train_file $SQUAD_DIR/train-v2.0.json 15 | --predict_file $SQUAD_DIR/dev-v2.0.json 16 | --per_gpu_train_batch_size 8 17 | --num_train_epochs 3 18 | --learning_rate 3e-5 19 | --max_seq_length 384 20 | --doc_stride 128 21 | --output_dir ./tmp/distilbert_fine_tuned/ 22 | ``` 23 | 24 | Performance on a dev subset is close to the original paper: 25 | 26 | ``` 27 | Results: 28 | { 29 | 'exact': 64.88976637051661, 30 | 'f1': 68.1776176526635, 31 | 'total': 6078, 32 | 'HasAns_exact': 69.7594501718213, 33 | 'HasAns_f1': 76.62665295288285, 34 | 'HasAns_total': 2910, 35 | 'NoAns_exact': 60.416666666666664, 36 | 'NoAns_f1': 60.416666666666664, 37 | 'NoAns_total': 3168, 38 | 'best_exact': 64.88976637051661, 39 | 'best_exact_thresh': 0.0, 40 | 'best_f1': 68.17761765266337, 41 | 'best_f1_thresh': 0.0 42 | } 43 | ``` 44 | 45 | We are hopeful this might save you time, energy, and compute. Cheers! -------------------------------------------------------------------------------- /docs/source/bertology.rst: -------------------------------------------------------------------------------- 1 | BERTology 2 | --------- 3 | 4 | There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT (that some call "BERTology"). Some good examples of this field are: 5 | 6 | 7 | * BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick: https://arxiv.org/abs/1905.05950 8 | * Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650 9 | * What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D. Manning: https://arxiv.org/abs/1906.04341 10 | 11 | In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to help people access the inner representations, mainly adapted from the great work of Paul Michel (https://arxiv.org/abs/1905.10650): 12 | 13 | 14 | * accessing all the hidden-states of BERT/GPT/GPT-2, 15 | * accessing all the attention weights for each head of BERT/GPT/GPT-2, 16 | * retrieving heads output values and gradients to be able to compute head importance score and prune head as explained in https://arxiv.org/abs/1905.10650. 17 | 18 | To help you understand and use these features, we have added a specific example script: `bertology.py `_ while extract information and prune a model pre-trained on GLUE. 19 | -------------------------------------------------------------------------------- /model_cards/activebus/BERT-DK_rest/README.md: -------------------------------------------------------------------------------- 1 | # ReviewBERT 2 | 3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects. 4 | 5 | `BERT-DK_rest` is trained from 1G (19 types) restaurants from Yelp. 6 | 7 | ## Model Description 8 | 9 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus. 10 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/). 11 | 12 | 13 | ## Instructions 14 | Loading the post-trained weights are as simple as, e.g., 15 | 16 | ```python 17 | import torch 18 | from transformers import AutoModel, AutoTokenizer 19 | 20 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-DK_rest") 21 | model = AutoModel.from_pretrained("activebus/BERT-DK_rest") 22 | 23 | ``` 24 | 25 | 26 | ## Evaluation Results 27 | 28 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf) 29 | 30 | 31 | ## Citation 32 | If you find this work useful, please cite as following. 33 | ``` 34 | @inproceedings{xu_bert2019, 35 | title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis", 36 | author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.", 37 | booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics", 38 | month = "jun", 39 | year = "2019", 40 | } 41 | ``` 42 | -------------------------------------------------------------------------------- /examples/token-classification/run.sh: -------------------------------------------------------------------------------- 1 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \ 2 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp 3 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \ 4 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp 5 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \ 6 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp 7 | wget "https://raw.githubusercontent.com/stefan-it/fine-tuned-berts-seq/master/scripts/preprocess.py" 8 | export MAX_LENGTH=128 9 | export BERT_MODEL=bert-base-multilingual-cased 10 | python3 preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt 11 | python3 preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt 12 | python3 preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt 13 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt 14 | export OUTPUT_DIR=germeval-model 15 | export BATCH_SIZE=32 16 | export NUM_EPOCHS=3 17 | export SAVE_STEPS=750 18 | export SEED=1 19 | 20 | python3 run_ner.py \ 21 | --data_dir . \ 22 | --labels ./labels.txt \ 23 | --model_name_or_path $BERT_MODEL \ 24 | --output_dir $OUTPUT_DIR \ 25 | --max_seq_length $MAX_LENGTH \ 26 | --num_train_epochs $NUM_EPOCHS \ 27 | --per_gpu_train_batch_size $BATCH_SIZE \ 28 | --save_steps $SAVE_STEPS \ 29 | --seed $SEED \ 30 | --do_train \ 31 | --do_eval \ 32 | --do_predict 33 | -------------------------------------------------------------------------------- /model_cards/activebus/BERT-PT_rest/README.md: -------------------------------------------------------------------------------- 1 | # ReviewBERT 2 | 3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects. 4 | 5 | `BERT-DK_rest` is trained from 1G (19 types) restaurants from Yelp. 6 | `BERT-PT_*` addtionally uses SQuAD 1.1. 7 | 8 | ## Model Description 9 | 10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus. 11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/). 12 | 13 | 14 | ## Instructions 15 | Loading the post-trained weights are as simple as, e.g., 16 | 17 | ```python 18 | import torch 19 | from transformers import AutoModel, AutoTokenizer 20 | 21 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-PT_rest") 22 | model = AutoModel.from_pretrained("activebus/BERT-PT_rest") 23 | 24 | ``` 25 | 26 | 27 | ## Evaluation Results 28 | 29 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf) 30 | 31 | 32 | ## Citation 33 | If you find this work useful, please cite as following. 34 | ``` 35 | @inproceedings{xu_bert2019, 36 | title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis", 37 | author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.", 38 | booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics", 39 | month = "jun", 40 | year = "2019", 41 | } 42 | ``` 43 | -------------------------------------------------------------------------------- /examples/summarization/t5/test_t5_examples.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import tempfile 4 | import unittest 5 | from pathlib import Path 6 | from unittest.mock import patch 7 | 8 | from .evaluate_cnn import run_generate 9 | 10 | 11 | output_file_name = "output_t5_sum.txt" 12 | score_file_name = "score_t5_sum.txt" 13 | 14 | articles = ["New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County."] 15 | 16 | logging.basicConfig(level=logging.DEBUG) 17 | 18 | logger = logging.getLogger() 19 | 20 | 21 | class TestT5Examples(unittest.TestCase): 22 | def test_t5_cli(self): 23 | stream_handler = logging.StreamHandler(sys.stdout) 24 | logger.addHandler(stream_handler) 25 | tmp = Path(tempfile.gettempdir()) / "utest_generations_t5_sum.hypo" 26 | with tmp.open("w") as f: 27 | f.write("\n".join(articles)) 28 | 29 | output_file_name = Path(tempfile.gettempdir()) / "utest_output_t5_sum.hypo" 30 | score_file_name = Path(tempfile.gettempdir()) / "utest_score_t5_sum.hypo" 31 | 32 | testargs = [ 33 | "evaluate_cnn.py", 34 | "patrickvonplaten/t5-tiny-random", 35 | str(tmp), 36 | str(output_file_name), 37 | str(tmp), 38 | str(score_file_name), 39 | ] 40 | 41 | with patch.object(sys, "argv", testargs): 42 | run_generate() 43 | self.assertTrue(Path(output_file_name).exists()) 44 | self.assertTrue(Path(score_file_name).exists()) 45 | -------------------------------------------------------------------------------- /tests/test_adapter_config.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from dataclasses import FrozenInstanceError 3 | 4 | from transformers import ADAPTER_CONFIG_MAP, AdapterConfig 5 | 6 | from .utils import require_torch 7 | 8 | 9 | @require_torch 10 | class AdapterConfigTest(unittest.TestCase): 11 | 12 | config_names = ["pfeiffer", "houlsby"] 13 | 14 | def test_config_load(self): 15 | download_kwargs = {"force_download": True} 16 | for config_name in self.config_names: 17 | with self.subTest(config_name=config_name): 18 | config = AdapterConfig.load(config_name, download_kwargs=download_kwargs, non_linearity="leakyrelu") 19 | self.assertTrue(isinstance(config, AdapterConfig)) 20 | self.assertEqual(config.non_linearity, "leakyrelu") 21 | 22 | def test_config_immutable(self): 23 | def set_attr(config: AdapterConfig): 24 | config.ln_before = True 25 | 26 | for config in ADAPTER_CONFIG_MAP.values(): 27 | with self.subTest(config=config.__class__.__name__): 28 | self.assertRaises(FrozenInstanceError, lambda: set_attr(config)) 29 | 30 | def test_custom_attr(self): 31 | for config in ADAPTER_CONFIG_MAP.values(): 32 | with self.subTest(config=config.__class__.__name__): 33 | # create a copy to leave original untouched 34 | config = config.replace() 35 | config.dummy_attr = "test_value" 36 | self.assertEqual(config.dummy_attr, "test_value") 37 | -------------------------------------------------------------------------------- /model_cards/activebus/BERT-PT_laptop/README.md: -------------------------------------------------------------------------------- 1 | # ReviewBERT 2 | 3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects. 4 | 5 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`. 6 | `BERT-PT_*` addtionally uses SQuAD 1.1. 7 | 8 | ## Model Description 9 | 10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus. 11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/). 12 | 13 | 14 | ## Instructions 15 | Loading the post-trained weights are as simple as, e.g., 16 | 17 | ```python 18 | import torch 19 | from transformers import AutoModel, AutoTokenizer 20 | 21 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-PT_laptop") 22 | model = AutoModel.from_pretrained("activebus/BERT-PT_laptop") 23 | 24 | ``` 25 | 26 | ## Evaluation Results 27 | 28 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf) 29 | 30 | 31 | ## Citation 32 | If you find this work useful, please cite as following. 33 | ``` 34 | @inproceedings{xu_bert2019, 35 | title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis", 36 | author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.", 37 | booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics", 38 | month = "jun", 39 | year = "2019", 40 | } 41 | ``` 42 | -------------------------------------------------------------------------------- /model_cards/fmikaelian/camembert-base-squad/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: french 3 | --- 4 | 5 | # camembert-base-squad 6 | 7 | ## Description 8 | 9 | A baseline model for question-answering in french ([CamemBERT](https://camembert-model.fr/) model fine-tuned on [french-translated SQuAD 1.1 dataset](https://github.com/Alikabbadj/French-SQuAD)) 10 | 11 | ## Training hyperparameters 12 | 13 | ```shell 14 | python3 ./examples/question-answering/run_squad.py \ 15 | --model_type camembert \ 16 | --model_name_or_path camembert-base \ 17 | --do_train \ 18 | --do_eval \ 19 | --do_lower_case \ 20 | --train_file SQuAD-v1.1-train_fr_ss999_awstart2_net.json \ 21 | --predict_file SQuAD-v1.1-dev_fr_ss999_awstart2_net.json \ 22 | --learning_rate 3e-5 \ 23 | --num_train_epochs 2 \ 24 | --max_seq_length 384 \ 25 | --doc_stride 128 \ 26 | --output_dir output3 \ 27 | --per_gpu_eval_batch_size=3 \ 28 | --per_gpu_train_batch_size=3 \ 29 | --save_steps 10000 30 | ``` 31 | 32 | ## Evaluation results 33 | 34 | ```shell 35 | {"f1": 79.8570684959745, "exact_match": 59.21327108373895} 36 | ``` 37 | 38 | ## Usage 39 | 40 | ```python 41 | from transformers import pipeline 42 | 43 | nlp = pipeline('question-answering', model='fmikaelian/camembert-base-squad', tokenizer='fmikaelian/camembert-base-squad') 44 | 45 | nlp({ 46 | 'question': "Qui est Claude Monet?", 47 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme." 48 | }) 49 | ``` -------------------------------------------------------------------------------- /model_cards/monologg/koelectra-base-generator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: Korean 3 | --- 4 | 5 | # KoELECTRA (Base Generator) 6 | 7 | Pretrained ELECTRA Language Model for Korean (`koelectra-base-generator`) 8 | 9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md). 10 | 11 | ## Usage 12 | 13 | ### Load model and tokenizer 14 | 15 | ```python 16 | >>> from transformers import ElectraModel, ElectraTokenizer 17 | 18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-base-generator") 19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-generator") 20 | ``` 21 | 22 | ### Tokenizer example 23 | 24 | ```python 25 | >>> from transformers import ElectraTokenizer 26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-generator") 27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]") 28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'] 29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']) 30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3] 31 | ``` 32 | 33 | ## Example using ElectraForMaskedLM 34 | 35 | ```python 36 | from transformers import pipeline 37 | 38 | fill_mask = pipeline( 39 | "fill-mask", 40 | model="monologg/koelectra-base-generator", 41 | tokenizer="monologg/koelectra-base-generator" 42 | ) 43 | 44 | print(fill_mask("나는 {} 밥을 먹었다.".format(fill_mask.tokenizer.mask_token))) 45 | ``` 46 | -------------------------------------------------------------------------------- /model_cards/julien-c/EsperBERTo-small/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: esperanto 3 | thumbnail: https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png 4 | --- 5 | 6 | # EsperBERTo: RoBERTa-like Language model trained on Esperanto 7 | 8 | **Companion model to blog post https://huggingface.co/blog/how-to-train** 🔥 9 | 10 | ## Training Details 11 | 12 | - current checkpoint: 566000 13 | - machine name: `galinette` 14 | 15 | 16 | ![](https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png) 17 | 18 | ## Example pipeline 19 | 20 | ```python 21 | from transformers import pipeline 22 | 23 | fill_mask = pipeline( 24 | "fill-mask", 25 | model="julien-c/EsperBERTo-small", 26 | tokenizer="julien-c/EsperBERTo-small" 27 | ) 28 | 29 | fill_mask("Jen la komenco de bela .") 30 | 31 | # This is the beginning of a beautiful . 32 | # => 33 | 34 | # { 35 | # 'score':0.06502299010753632 36 | # 'sequence':' Jen la komenco de bela vivo.' 37 | # 'token':1099 38 | # } 39 | # { 40 | # 'score':0.0421181358397007 41 | # 'sequence':' Jen la komenco de bela vespero.' 42 | # 'token':5100 43 | # } 44 | # { 45 | # 'score':0.024884626269340515 46 | # 'sequence':' Jen la komenco de bela laboro.' 47 | # 'token':1570 48 | # } 49 | # { 50 | # 'score':0.02324388362467289 51 | # 'sequence':' Jen la komenco de bela tago.' 52 | # 'token':1688 53 | # } 54 | # { 55 | # 'score':0.020378097891807556 56 | # 'sequence':' Jen la komenco de bela festo.' 57 | # 'token':4580 58 | # } 59 | ``` 60 | -------------------------------------------------------------------------------- /model_cards/monologg/koelectra-small-generator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: Korean 3 | --- 4 | 5 | # KoELECTRA (Small Generator) 6 | 7 | Pretrained ELECTRA Language Model for Korean (`koelectra-small-generator`) 8 | 9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md). 10 | 11 | ## Usage 12 | 13 | ### Load model and tokenizer 14 | 15 | ```python 16 | >>> from transformers import ElectraModel, ElectraTokenizer 17 | 18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-small-generator") 19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-generator") 20 | ``` 21 | 22 | ### Tokenizer example 23 | 24 | ```python 25 | >>> from transformers import ElectraTokenizer 26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-generator") 27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]") 28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'] 29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']) 30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3] 31 | ``` 32 | 33 | ## Example using ElectraForMaskedLM 34 | 35 | ```python 36 | from transformers import pipeline 37 | 38 | fill_mask = pipeline( 39 | "fill-mask", 40 | model="monologg/koelectra-small-generator", 41 | tokenizer="monologg/koelectra-small-generator" 42 | ) 43 | 44 | print(fill_mask("나는 {} 밥을 먹었다.".format(fill_mask.tokenizer.mask_token))) 45 | ``` 46 | -------------------------------------------------------------------------------- /model_cards/fmikaelian/flaubert-base-uncased-squad/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: french 3 | --- 4 | 5 | # flaubert-base-uncased-squad 6 | 7 | ## Description 8 | 9 | A baseline model for question-answering in french ([flaubert](https://github.com/getalp/Flaubert) model fine-tuned on [french-translated SQuAD 1.1 dataset](https://github.com/Alikabbadj/French-SQuAD)) 10 | 11 | ## Training hyperparameters 12 | 13 | ```shell 14 | python3 ./examples/question-answering/run_squad.py \ 15 | --model_type flaubert \ 16 | --model_name_or_path flaubert-base-uncased \ 17 | --do_train \ 18 | --do_eval \ 19 | --do_lower_case \ 20 | --train_file SQuAD-v1.1-train_fr_ss999_awstart2_net.json \ 21 | --predict_file SQuAD-v1.1-dev_fr_ss999_awstart2_net.json \ 22 | --learning_rate 3e-5 \ 23 | --num_train_epochs 2 \ 24 | --max_seq_length 384 \ 25 | --doc_stride 128 \ 26 | --output_dir output \ 27 | --per_gpu_eval_batch_size=3 \ 28 | --per_gpu_train_batch_size=3 29 | ``` 30 | 31 | ## Evaluation results 32 | 33 | ```shell 34 | {"f1": 68.66174806561969, "exact_match": 49.299692063176714} 35 | ``` 36 | 37 | ## Usage 38 | 39 | ```python 40 | from transformers import pipeline 41 | 42 | nlp = pipeline('question-answering', model='fmikaelian/flaubert-base-uncased-squad', tokenizer='fmikaelian/flaubert-base-uncased-squad') 43 | 44 | nlp({ 45 | 'question': "Qui est Claude Monet?", 46 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme." 47 | }) 48 | ``` -------------------------------------------------------------------------------- /model_cards/ixa-ehu/berteus-base-cased/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - basque 4 | --- 5 | 6 | # BERTeus base cased 7 | 8 | This is the Basque language pretrained model presented in [Give your Text Representation Models some Love: the Case for Basque](https://arxiv.org/pdf/2004.00033.pdf). This model has been trained on a Basque corpus comprising Basque crawled news articles from online newspapers and the Basque Wikipedia. The training corpus contains 224.6 million tokens, of which 35 million come from the Wikipedia. 9 | 10 | BERTeus has been tested on four different downstream tasks for Basque: part-of-speech (POS) tagging, named entity recognition (NER), sentiment analysis and topic classification; improving the state of the art for all tasks. See summary of results below: 11 | 12 | 13 | | Downstream task | BERTeus | mBERT | Previous SOTA | 14 | | --------------- | ------- | ------| ------------- | 15 | | Topic Classification | **76.77** | 68.42 | 63.00 | 16 | | Sentiment | **78.10** | 71.02 | 74.02 | 17 | | POS | **97.76** | 96.37 | 96.10 | 18 | | NER | **87.06** | 81.52 | 76.72 | 19 | 20 | 21 | If using this model, please cite the following paper: 22 | ``` 23 | @inproceedings{agerri2020give, 24 | title={Give your Text Representation Models some Love: the Case for Basque}, 25 | author={Rodrigo Agerri and I{\~n}aki San Vicente and Jon Ander Campos and Ander Barrena and Xabier Saralegi and Aitor Soroa and Eneko Agirre}, 26 | booktitle={Proceedings of the 12th International Conference on Language Resources and Evaluation}, 27 | year={2020} 28 | } 29 | ``` 30 | -------------------------------------------------------------------------------- /examples/adversarial/README.md: -------------------------------------------------------------------------------- 1 | ## Adversarial evaluation of model performances 2 | 3 | Here is an example on evaluating a model using adversarial evaluation of natural language inference with the Heuristic Analysis for NLI Systems (HANS) dataset [McCoy et al., 2019](https://arxiv.org/abs/1902.01007). The example was gracefully provided by [Nafise Sadat Moosavi](https://github.com/ns-moosavi). 4 | 5 | The HANS dataset can be downloaded from [this location](https://github.com/tommccoy1/hans). 6 | 7 | This is an example of using test_hans.py: 8 | 9 | ```bash 10 | export HANS_DIR=path-to-hans 11 | export MODEL_TYPE=type-of-the-model-e.g.-bert-roberta-xlnet-etc 12 | export MODEL_PATH=path-to-the-model-directory-that-is-trained-on-NLI-e.g.-by-using-run_glue.py 13 | 14 | python examples/hans/test_hans.py \ 15 | --task_name hans \ 16 | --model_type $MODEL_TYPE \ 17 | --do_eval \ 18 | --data_dir $HANS_DIR \ 19 | --model_name_or_path $MODEL_PATH \ 20 | --max_seq_length 128 \ 21 | --output_dir $MODEL_PATH \ 22 | ``` 23 | 24 | This will create the hans_predictions.txt file in MODEL_PATH, which can then be evaluated using hans/evaluate_heur_output.py from the HANS dataset. 25 | 26 | The results of the BERT-base model that is trained on MNLI using batch size 8 and the random seed 42 on the HANS dataset is as follows: 27 | 28 | ```bash 29 | Heuristic entailed results: 30 | lexical_overlap: 0.9702 31 | subsequence: 0.9942 32 | constituent: 0.9962 33 | 34 | Heuristic non-entailed results: 35 | lexical_overlap: 0.199 36 | subsequence: 0.0396 37 | constituent: 0.118 38 | ``` 39 | -------------------------------------------------------------------------------- /model_cards/activebus/BERT-DK_laptop/README.md: -------------------------------------------------------------------------------- 1 | # ReviewBERT 2 | 3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects. 4 | 5 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`. 6 | 7 | 8 | ## Model Description 9 | 10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus. 11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/). 12 | 13 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`. 14 | 15 | ## Instructions 16 | Loading the post-trained weights are as simple as, e.g., 17 | 18 | ```python 19 | import torch 20 | from transformers import AutoModel, AutoTokenizer 21 | 22 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-DK_laptop") 23 | model = AutoModel.from_pretrained("activebus/BERT-DK_laptop") 24 | 25 | ``` 26 | 27 | 28 | ## Evaluation Results 29 | 30 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf) 31 | 32 | 33 | ## Citation 34 | If you find this work useful, please cite as following. 35 | ``` 36 | @inproceedings{xu_bert2019, 37 | title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis", 38 | author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.", 39 | booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics", 40 | month = "jun", 41 | year = "2019", 42 | } 43 | ``` 44 | -------------------------------------------------------------------------------- /adapter_docs/classes/roberta.rst: -------------------------------------------------------------------------------- 1 | RoBERTa 2 | ======== 3 | 4 | The RoBERTa model was proposed in `RoBERTa: A Robustly Optimized BERT Pretraining Approach `_ 5 | by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, 6 | Veselin Stoyanov. It is based on Google's BERT model released in 2018. 7 | 8 | .. note:: 9 | This class is nearly identical to the PyTorch implementation of RoBERTa in Huggingface Transformers. 10 | For more information, visit `the corresponding section in their documentation `_. 11 | 12 | RobertaConfig 13 | ~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: transformers.RobertaConfig 16 | :members: 17 | 18 | 19 | RobertaTokenizer 20 | ~~~~~~~~~~~~~~~~~~~~~ 21 | 22 | .. autoclass:: transformers.RobertaTokenizer 23 | :members: build_inputs_with_special_tokens, get_special_tokens_mask, 24 | create_token_type_ids_from_sequences, save_vocabulary 25 | 26 | 27 | RobertaModel 28 | ~~~~~~~~~~~~~~~~~~~~ 29 | 30 | .. autoclass:: transformers.RobertaModel 31 | :members: 32 | 33 | 34 | RobertaForMaskedLM 35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 36 | 37 | .. autoclass:: transformers.RobertaForMaskedLM 38 | :members: 39 | 40 | 41 | RobertaForSequenceClassification 42 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 43 | 44 | .. autoclass:: transformers.RobertaForSequenceClassification 45 | :members: 46 | 47 | 48 | RobertaForTokenClassification 49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | 51 | .. autoclass:: transformers.RobertaForTokenClassification 52 | :members: 53 | -------------------------------------------------------------------------------- /model_cards/ahotrod/roberta_large_squad2/README.md: -------------------------------------------------------------------------------- 1 | ## RoBERTa-large language model fine-tuned on SQuAD2.0 2 | 3 | ### with the following results: 4 | 5 | ``` 6 | "exact": 84.46896319380106, 7 | "f1": 87.85388093408943, 8 | "total": 11873, 9 | "HasAns_exact": 81.37651821862349, 10 | "HasAns_f1": 88.1560607844881, 11 | "HasAns_total": 5928, 12 | "NoAns_exact": 87.55256518082422, 13 | "NoAns_f1": 87.55256518082422, 14 | "NoAns_total": 5945, 15 | "best_exact": 84.46896319380106, 16 | "best_exact_thresh": 0.0, 17 | "best_f1": 87.85388093408929, 18 | "best_f1_thresh": 0.0 19 | ``` 20 | ### from script: 21 | ``` 22 | python ${EXAMPLES}/run_squad.py \ 23 | --model_type roberta \ 24 | --model_name_or_path roberta-large \ 25 | --do_train \ 26 | --do_eval \ 27 | --train_file ${SQUAD}/train-v2.0.json \ 28 | --predict_file ${SQUAD}/dev-v2.0.json \ 29 | --version_2_with_negative \ 30 | --do_lower_case \ 31 | --num_train_epochs 3 \ 32 | --warmup_steps 1642 \ 33 | --weight_decay 0.01 \ 34 | --learning_rate 3e-5 \ 35 | --adam_epsilon 1e-6 \ 36 | --max_seq_length 512 \ 37 | --doc_stride 128 \ 38 | --per_gpu_train_batch_size 8 \ 39 | --gradient_accumulation_steps 6 \ 40 | --per_gpu_eval_batch_size 48 \ 41 | --threads 12 \ 42 | --logging_steps 50 \ 43 | --save_steps 2000 \ 44 | --overwrite_output_dir \ 45 | --output_dir ${MODEL_PATH} 46 | $@ 47 | ``` 48 | ### using the following system & software: 49 | ``` 50 | Transformers: 2.7.0 51 | PyTorch: 1.4.0 52 | TensorFlow: 2.1.0 53 | Python: 3.7.7 54 | OS/Platform: Linux-5.3.0-46-generic-x86_64-with-debian-buster-sid 55 | CPU/GPU: Intel i9-9900K / NVIDIA Titan RTX 24GB 56 | ``` 57 | -------------------------------------------------------------------------------- /model_cards/illuin/camembert-large-fquad/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: french 3 | --- 4 | 5 | # camembert-large-fquad 6 | 7 | ## Description 8 | 9 | A native French Question Answering model [CamemBERT-large](https://camembert-model.fr/) fine-tuned on [FQuAD](https://fquad.illuin.tech/). 10 | 11 | ## FQuAD Leaderboard and evaluation scores 12 | 13 | The results of Camembert-large-fquad can be compared with other state-of-the-art models of the [FQuAD Leaderboard](https://illuin-tech.github.io/FQuAD-explorer/). 14 | 15 | On the test set the model scores, 16 | 17 | ```shell 18 | {"f1": 91.5, "exact_match": 82.0} 19 | ``` 20 | 21 | On the development set the model scores, 22 | 23 | ```shell 24 | {"f1": 91.0, "exact_match": 81.2} 25 | ``` 26 | 27 | Note : You can also explore the results of the model on [FQuAD-Explorer](https://illuin-tech.github.io/FQuAD-explorer/) ! 28 | 29 | ## Usage 30 | 31 | ```python 32 | from transformers import pipeline 33 | 34 | nlp = pipeline('question-answering', model='illuin/camembert-large-fquad', tokenizer='illuin/camembert-large-fquad') 35 | 36 | nlp({ 37 | 'question': "Qui est Claude Monet?", 38 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme." 39 | }) 40 | ``` 41 | 42 | ## Citation 43 | 44 | If you use our work, please cite: 45 | 46 | ```bibtex 47 | @article{dHoffschmidt2020FQuADFQ, 48 | title={FQuAD: French Question Answering Dataset}, 49 | author={Martin d'Hoffschmidt and Maxime Vidal and Wacim Belblidia and Tom Brendl'e and Quentin Heinrich}, 50 | journal={ArXiv}, 51 | year={2020}, 52 | volume={abs/2002.06071} 53 | } 54 | ``` 55 | -------------------------------------------------------------------------------- /src/transformers/configuration_camembert.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. 3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """ CamemBERT configuration """ 17 | 18 | 19 | import logging 20 | 21 | from .configuration_roberta import RobertaConfig 22 | 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { 27 | "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-config.json", 28 | "umberto-commoncrawl-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-commoncrawl-cased-v1/config.json", 29 | "umberto-wikipedia-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-wikipedia-uncased-v1/config.json", 30 | } 31 | 32 | 33 | class CamembertConfig(RobertaConfig): 34 | """ 35 | This class overrides :class:`~transformers.RobertaConfig`. Please check the 36 | superclass for the appropriate documentation alongside usage examples. 37 | """ 38 | 39 | model_type = "camembert" 40 | -------------------------------------------------------------------------------- /src/transformers/configuration_mmbt.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # Copyright (c) HuggingFace Inc. team. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """ MMBT configuration """ 17 | 18 | 19 | import logging 20 | 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class MMBTConfig(object): 26 | """Configuration class to store the configuration of a `MMBT Model`. 27 | 28 | Args: 29 | config (:obj:`~transformers.PreTrainedConfig`): 30 | Config of the underlying Transformer models. Its values are 31 | copied over to use a single config. 32 | num_labels (:obj:`int` or :obj:`None`, optional, defaults to `None`): 33 | Size of final Linear layer for classification. 34 | modal_hidden_size (:obj:`int`, optional, defautls to 2048): 35 | Embedding dimension of the non-text modality encoder. 36 | """ 37 | 38 | def __init__(self, config, num_labels=None, modal_hidden_size=2048): 39 | self.__dict__ = config.__dict__ 40 | self.modal_hidden_size = modal_hidden_size 41 | if num_labels: 42 | self.num_labels = num_labels 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug Report" 3 | about: Submit a bug report to help us improve adapter-transformers 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🐛 Bug 11 | 12 | ## Information 13 | 14 | Model I am using (Bert, XLNet ...): 15 | 16 | Language I am using the model on (English, Chinese ...): 17 | 18 | Adapter setup I am using (if any): 19 | 20 | The problem arises when using: 21 | * [ ] the official example scripts: (give details below) 22 | * [ ] my own modified scripts: (give details below) 23 | 24 | The tasks I am working on is: 25 | * [ ] an official GLUE/SQUaD task: (give the name) 26 | * [ ] my own task or dataset: (give details below) 27 | 28 | ## To reproduce 29 | 30 | Steps to reproduce the behavior: 31 | 32 | 1. 33 | 2. 34 | 3. 35 | 36 | 39 | 40 | ## Expected behavior 41 | 42 | 43 | 44 | ## Environment info 45 | 47 | 48 | - `transformers` version: 49 | - Platform: 50 | - Python version: 51 | - PyTorch version (GPU?): 52 | - Tensorflow version (GPU?): 53 | - Using GPU in script?: 54 | - Using distributed or parallel set-up in script?: 55 | -------------------------------------------------------------------------------- /examples/multiple-choice/README.md: -------------------------------------------------------------------------------- 1 | ## Multiple Choice 2 | 3 | Based on the script [`run_multiple_choice.py`](). 4 | 5 | #### Fine-tuning on SWAG 6 | Download [swag](https://github.com/rowanz/swagaf/tree/master/data) data 7 | 8 | ```bash 9 | #training on 4 tesla V100(16GB) GPUS 10 | export SWAG_DIR=/path/to/swag_data_dir 11 | python ./examples/multiple-choice/run_multiple_choice.py \ 12 | --task_name swag \ 13 | --model_name_or_path roberta-base \ 14 | --do_train \ 15 | --do_eval \ 16 | --data_dir $SWAG_DIR \ 17 | --learning_rate 5e-5 \ 18 | --num_train_epochs 3 \ 19 | --max_seq_length 80 \ 20 | --output_dir models_bert/swag_base \ 21 | --per_gpu_eval_batch_size=16 \ 22 | --per_device_train_batch_size=16 \ 23 | --gradient_accumulation_steps 2 \ 24 | --overwrite_output 25 | ``` 26 | Training with the defined hyper-parameters yields the following results: 27 | ``` 28 | ***** Eval results ***** 29 | eval_acc = 0.8338998300509847 30 | eval_loss = 0.44457291918821606 31 | ``` 32 | 33 | 34 | ## Tensorflow 35 | 36 | ```bash 37 | export SWAG_DIR=/path/to/swag_data_dir 38 | python ./examples/multiple-choice/run_tf_multiple_choice.py \ 39 | --task_name swag \ 40 | --model_name_or_path bert-base-cased \ 41 | --do_train \ 42 | --do_eval \ 43 | --data_dir $SWAG_DIR \ 44 | --learning_rate 5e-5 \ 45 | --num_train_epochs 3 \ 46 | --max_seq_length 80 \ 47 | --output_dir models_bert/swag_base \ 48 | --per_gpu_eval_batch_size=16 \ 49 | --per_device_train_batch_size=16 \ 50 | --logging-dir logs \ 51 | --gradient_accumulation_steps 2 \ 52 | --overwrite_output 53 | ``` 54 | 55 | # Run it in colab 56 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ViktorAlm/notebooks/blob/master/MPC_GPU_Demo_for_TF_and_PT.ipynb) 57 | -------------------------------------------------------------------------------- /tests/test_tokenization_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 HuggingFace Inc.. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import unittest 18 | 19 | from transformers import PreTrainedTokenizer 20 | from transformers.tokenization_gpt2 import GPT2Tokenizer 21 | 22 | from .utils import slow 23 | 24 | 25 | class TokenizerUtilsTest(unittest.TestCase): 26 | def check_tokenizer_from_pretrained(self, tokenizer_class): 27 | s3_models = list(tokenizer_class.max_model_input_sizes.keys()) 28 | for model_name in s3_models[:1]: 29 | tokenizer = tokenizer_class.from_pretrained(model_name) 30 | self.assertIsNotNone(tokenizer) 31 | self.assertIsInstance(tokenizer, tokenizer_class) 32 | self.assertIsInstance(tokenizer, PreTrainedTokenizer) 33 | 34 | for special_tok in tokenizer.all_special_tokens: 35 | self.assertIsInstance(special_tok, str) 36 | special_tok_id = tokenizer.convert_tokens_to_ids(special_tok) 37 | self.assertIsInstance(special_tok_id, int) 38 | 39 | @slow 40 | def test_pretrained_tokenizers(self): 41 | self.check_tokenizer_from_pretrained(GPT2Tokenizer) 42 | -------------------------------------------------------------------------------- /model_cards/valhalla/t5-base-squad/README.md: -------------------------------------------------------------------------------- 1 | # T5 for question-answering 2 | This is T5-base model fine-tuned on SQuAD1.1 for QA using text-to-text approach 3 | 4 | ## Model training 5 | This model was trained on colab TPU with 35GB RAM for 4 epochs 6 | 7 | ## Results: 8 | | Metric | #Value | 9 | |-------------|---------| 10 | | Exact Match | 81.5610 | 11 | | F1 | 89.9601 | 12 | 13 | ## Model in Action 🚀 14 | ``` 15 | from transformers import AutoModelWithLMHead, AutoTokenizer 16 | 17 | tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-squad") 18 | model = AutoModelWithLMHead.from_pretrained("valhalla/t5-base-squad") 19 | 20 | def get_answer(question, context): 21 | input_text = "question: %s context: %s " % (question, context) 22 | features = tokenizer.batch_encode_plus([input_text], return_tensors='pt') 23 | 24 | out = model.generate(input_ids=features['input_ids'], 25 | attention_mask=features['attention_mask']) 26 | 27 | return tokenizer.decode(out[0]) 28 | 29 | context = "In Norse mythology, Valhalla is a majestic, enormous hall located in Asgard, ruled over by the god Odin." 30 | question = "What is Valhalla ?" 31 | 32 | get_answer(question, context) 33 | # output: 'a majestic, enormous hall located in Asgard, ruled over by the god Odin' 34 | ``` 35 | Play with this model [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1a5xpJiUjZybfU9Mi-aDkOp116PZ9-wni?usp=sharing) 36 | 37 | > Created by Suraj Patil [![Github icon](https://cdn0.iconfinder.com/data/icons/octicons/1024/mark-github-32.png)](https://github.com/patil-suraj/) 38 | [![Twitter icon](https://cdn0.iconfinder.com/data/icons/shift-logotypes/32/Twitter-32.png)](https://twitter.com/psuraj28) 39 | -------------------------------------------------------------------------------- /model_cards/Tereveni-AI/gpt2-124M-uk-fiction/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: ukrainian 3 | --- 4 | 5 | Note: **default code snippet above won't work** because we are using `AlbertTokenizer` with `GPT2LMHeadModel`, see [issue](https://github.com/huggingface/transformers/issues/4285). 6 | 7 | ## GPT2 124M Trained on Ukranian Fiction 8 | 9 | ### Training details 10 | 11 | Model was trained on corpus of 4040 fiction books, 2.77 GiB in total. 12 | Evaluation on [brown-uk](https://github.com/brown-uk/corpus) gives perplexity of 50.16. 13 | 14 | ### Example usage: 15 | ```python 16 | from transformers import AlbertTokenizer, GPT2LMHeadModel 17 | 18 | tokenizer = AlbertTokenizer.from_pretrained("Tereveni-AI/gpt2-124M-uk-fiction") 19 | model = GPT2LMHeadModel.from_pretrained("Tereveni-AI/gpt2-124M-uk-fiction") 20 | 21 | input_ids = tokenizer.encode("Но зла Юнона, суча дочка,", add_special_tokens=False, return_tensors='pt') 22 | 23 | outputs = model.generate( 24 | input_ids, 25 | do_sample=True, 26 | num_return_sequences=3, 27 | max_length=50 28 | ) 29 | 30 | for i, out in enumerate(outputs): 31 | print("{}: {}".format(i, tokenizer.decode(out))) 32 | ``` 33 | 34 | Prints something like this: 35 | ```bash 36 | 0: Но зла Юнона, суча дочка, яка затьмарила всі її таємниці: І хто з'їсть її душу, той помре». І, не дочекавшись гніву богів, посунула в пітьму, щоб не бачити перед собою. Але, за 37 | 1: Но зла Юнона, суча дочка, і довела мене до божевілля. Але він не знав нічого. Після того як я його побачив, мені стало зле. Я втратив рівновагу. Але в мене не було часу на роздуми. Я вже втратив надію 38 | 2: Но зла Юнона, суча дочка, не нарікала нам! — раптом вигукнула Юнона. — Це ти, старий йолопе! — мовила вона, не перестаючи сміятись. — Хіба ти не знаєш, що мені подобається ходити з тобою? 39 | ``` -------------------------------------------------------------------------------- /src/transformers/activations.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def swish(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | def _gelu_python(x): 16 | """ Original Implementation of the gelu activation function in Google Bert repo when initially created. 17 | For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 18 | 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) 19 | This is now written in C in torch.nn.functional 20 | Also see https://arxiv.org/abs/1606.08415 21 | """ 22 | return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) 23 | 24 | 25 | def gelu_new(x): 26 | """ Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT). 27 | Also see https://arxiv.org/abs/1606.08415 28 | """ 29 | return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) 30 | 31 | 32 | if torch.__version__ < "1.4.0": 33 | gelu = _gelu_python 34 | else: 35 | gelu = F.gelu 36 | 37 | 38 | def gelu_fast(x): 39 | return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 * (1.0 + 0.044715 * x * x))) 40 | 41 | 42 | ACT2FN = { 43 | "relu": F.relu, 44 | "swish": swish, 45 | "gelu": gelu, 46 | "tanh": torch.tanh, 47 | "gelu_new": gelu_new, 48 | "gelu_fast": gelu_fast, 49 | } 50 | 51 | 52 | def get_activation(activation_string): 53 | if activation_string in ACT2FN: 54 | return ACT2FN[activation_string] 55 | else: 56 | raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys()))) 57 | -------------------------------------------------------------------------------- /.github/workflows/tests_torch.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ 'master' ] 6 | paths: 7 | - 'src/**' 8 | - 'examples/**' 9 | - 'templates/**' 10 | - 'tests/**' 11 | - 'utils/**' 12 | pull_request: 13 | branches: [ 'master' ] 14 | paths: 15 | - 'src/**' 16 | - 'examples/**' 17 | - 'templates/**' 18 | - 'tests/**' 19 | - 'utils/**' 20 | 21 | jobs: 22 | check_code_quality: 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v2 26 | - uses: actions/setup-python@v2 27 | with: 28 | python-version: 3.6 29 | - uses: actions/cache@v2 30 | with: 31 | path: ~/.cache/pip 32 | key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }} 33 | restore-keys: | 34 | ${{ runner.os }}-pip- 35 | - name: Install 36 | run: | 37 | pip install .[torch,quality] 38 | pip uninstall isort -y 39 | pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort 40 | - name: Check Quality 41 | run: | 42 | make quality 43 | run_reduced_tests_torch: 44 | runs-on: ubuntu-latest 45 | steps: 46 | - uses: actions/checkout@v2 47 | - uses: actions/setup-python@v2 48 | with: 49 | python-version: 3.6 50 | - uses: actions/cache@v2 51 | with: 52 | path: ~/.cache/pip 53 | key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }} 54 | restore-keys: | 55 | ${{ runner.os }}-pip- 56 | - name: Install 57 | run: | 58 | pip install .[sklearn,torch,testing] 59 | - name: Test 60 | run: | 61 | make test-reduced 62 | -------------------------------------------------------------------------------- /model_cards/lvwerra/gpt2-imdb-ctrl/README.md: -------------------------------------------------------------------------------- 1 | # GPT2-IMDB-ctrl 2 | 3 | ## What is it? 4 | A small GPT2 (`lvwerra/gpt2-imdb`) language model fine-tuned to produce controlled movie reviews based the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). The model is trained with rewards from a BERT sentiment classifier (`lvwerra/bert-imdb`) via PPO. 5 | 6 | ## Training setting 7 | The model was trained for `200` optimisation steps with a batch size of `256` which corresponds to `51200` training samples. The full experiment setup can be found in the Jupyter notebook in the [trl repo](https://lvwerra.github.io/trl/05-gpt2-sentiment-ppo-training/). The strings `"[negative]"`, `"[neutral]"`, and `"[positive]"` are added at the beginning of the query to control the sentiment. 8 | 9 | ## Examples 10 | A few examples of the model response to a query before and after optimisation: 11 | 12 | | query | response [negative] | rewards [negative] | response [neutral] | rewards [neutral] | response [positive] | rewards [positive] | 13 | |-------|---------------------|--------------------|--------------------|-------------------|---------------------|--------------------| 14 | |I watched this movie when|it was released and was awful. Little bit of ...|3.130034|it was released and it was the first movie I ...|-1.351991|I was younger it was wonderful. The new play ...|4.232218| 15 | |I can remember seeing this|movie in 2008, and I was so disappointed...yo...|3.428725|in support groups, which I think was not as i...|0.213288|movie, and it is one of my favorite movies ev...|4.168838| 16 | |This 1970 hit film has|little resonance. This movie is bad, not only...|4.241872|a bit of Rocket power.783287. It can be easil...|0.849278|the best formula for comedy and is't just jus...|4.208804| 17 | 18 | 19 | -------------------------------------------------------------------------------- /model_cards/voidful/albert_chinese_base/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - chinese 4 | --- 5 | 6 | # albert_chinese_base 7 | 8 | This a albert_chinese_base model from [Google's github](https://github.com/google-research/ALBERT) 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py) 10 | 11 | ## Attention (注意) 12 | 13 | Since sentencepiece is not used in albert_chinese_base model 14 | you have to call BertTokenizer instead of AlbertTokenizer !!! 15 | we can eval it using an example on MaskedLM 16 | 17 | 由於 albert_chinese_base 模型沒有用 sentencepiece 18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!! 19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確 20 | 21 | ## Justify (驗證有效性) 22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj) 23 | ```python 24 | from transformers import * 25 | import torch 26 | from torch.nn.functional import softmax 27 | 28 | pretrained = 'voidful/albert_chinese_base' 29 | tokenizer = BertTokenizer.from_pretrained(pretrained) 30 | model = AlbertForMaskedLM.from_pretrained(pretrained) 31 | 32 | inputtext = "今天[MASK]情很好" 33 | 34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103) 35 | 36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1 37 | outputs = model(input_ids, masked_lm_labels=input_ids) 38 | loss, prediction_scores = outputs[:2] 39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist() 40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item() 41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] 42 | print(predicted_token,logit_prob[predicted_index]) 43 | ``` 44 | Result: `感 0.36333346366882324` 45 | -------------------------------------------------------------------------------- /docs/source/model_doc/auto.rst: -------------------------------------------------------------------------------- 1 | AutoModels 2 | ----------- 3 | 4 | In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you are supplying to the ``from_pretrained`` method. 5 | 6 | AutoClasses are here to do this job for you so that you automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary: 7 | 8 | Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will directly create a class of the relevant architecture (ex: ``model = AutoModel.from_pretrained('bert-base-cased')`` will create a instance of ``BertModel``). 9 | 10 | 11 | ``AutoConfig`` 12 | ~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.AutoConfig 15 | :members: 16 | 17 | 18 | ``AutoTokenizer`` 19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autoclass:: transformers.AutoTokenizer 22 | :members: 23 | 24 | 25 | ``AutoModel`` 26 | ~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | .. autoclass:: transformers.AutoModel 29 | :members: 30 | 31 | 32 | ``AutoModelForPreTraining`` 33 | ~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | .. autoclass:: transformers.AutoModelForPreTraining 36 | :members: 37 | 38 | 39 | ``AutoModelWithLMHead`` 40 | ~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | .. autoclass:: transformers.AutoModelWithLMHead 43 | :members: 44 | 45 | 46 | ``AutoModelForSequenceClassification`` 47 | ~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | .. autoclass:: transformers.AutoModelForSequenceClassification 50 | :members: 51 | 52 | 53 | ``AutoModelForQuestionAnswering`` 54 | ~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | .. autoclass:: transformers.AutoModelForQuestionAnswering 57 | :members: 58 | 59 | 60 | ``AutoModelForTokenClassification`` 61 | ~~~~~~~~~~~~~~~~~~~~~ 62 | 63 | .. autoclass:: transformers.AutoModelForTokenClassification 64 | :members: 65 | 66 | -------------------------------------------------------------------------------- /model_cards/voidful/albert_chinese_large/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - chinese 4 | --- 5 | 6 | # albert_chinese_large 7 | 8 | This a albert_chinese_large model from [Google's github](https://github.com/google-research/ALBERT) 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py) 10 | 11 | ## Attention (注意) 12 | 13 | Since sentencepiece is not used in albert_chinese_large model 14 | you have to call BertTokenizer instead of AlbertTokenizer !!! 15 | we can eval it using an example on MaskedLM 16 | 17 | 由於 albert_chinese_large 模型沒有用 sentencepiece 18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!! 19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確 20 | 21 | ## Justify (驗證有效性) 22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj) 23 | ```python 24 | from transformers import * 25 | import torch 26 | from torch.nn.functional import softmax 27 | 28 | pretrained = 'voidful/albert_chinese_large' 29 | tokenizer = BertTokenizer.from_pretrained(pretrained) 30 | model = AlbertForMaskedLM.from_pretrained(pretrained) 31 | 32 | inputtext = "今天[MASK]情很好" 33 | 34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103) 35 | 36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1 37 | outputs = model(input_ids, masked_lm_labels=input_ids) 38 | loss, prediction_scores = outputs[:2] 39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist() 40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item() 41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] 42 | print(predicted_token,logit_prob[predicted_index]) 43 | ``` 44 | Result: `心 0.9422469735145569` 45 | -------------------------------------------------------------------------------- /model_cards/voidful/albert_chinese_xlarge/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - chinese 4 | --- 5 | 6 | # albert_chinese_xlarge 7 | 8 | This a albert_chinese_xlarge model from [Google's github](https://github.com/google-research/ALBERT) 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py) 10 | 11 | ## Attention (注意) 12 | 13 | Since sentencepiece is not used in albert_chinese_xlarge model 14 | you have to call BertTokenizer instead of AlbertTokenizer !!! 15 | we can eval it using an example on MaskedLM 16 | 17 | 由於 albert_chinese_xlarge 模型沒有用 sentencepiece 18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!! 19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確 20 | 21 | ## Justify (驗證有效性) 22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj) 23 | ```python 24 | from transformers import * 25 | import torch 26 | from torch.nn.functional import softmax 27 | 28 | pretrained = 'voidful/albert_chinese_xlarge' 29 | tokenizer = BertTokenizer.from_pretrained(pretrained) 30 | model = AlbertForMaskedLM.from_pretrained(pretrained) 31 | 32 | inputtext = "今天[MASK]情很好" 33 | 34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103) 35 | 36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1 37 | outputs = model(input_ids, masked_lm_labels=input_ids) 38 | loss, prediction_scores = outputs[:2] 39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist() 40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item() 41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] 42 | print(predicted_token,logit_prob[predicted_index]) 43 | ``` 44 | Result: `心 0.9942440390586853` 45 | -------------------------------------------------------------------------------- /model_cards/voidful/albert_chinese_xxlarge/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - chinese 4 | --- 5 | 6 | # albert_chinese_xxlarge 7 | 8 | This a albert_chinese_xxlarge model from [Google's github](https://github.com/google-research/ALBERT) 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py) 10 | 11 | ## Attention (注意) 12 | 13 | Since sentencepiece is not used in albert_chinese_xxlarge model 14 | you have to call BertTokenizer instead of AlbertTokenizer !!! 15 | we can eval it using an example on MaskedLM 16 | 17 | 由於 albert_chinese_xxlarge 模型沒有用 sentencepiece 18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!! 19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確 20 | 21 | ## Justify (驗證有效性) 22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj) 23 | ```python 24 | from transformers import * 25 | import torch 26 | from torch.nn.functional import softmax 27 | 28 | pretrained = 'voidful/albert_chinese_xxlarge' 29 | tokenizer = BertTokenizer.from_pretrained(pretrained) 30 | model = AlbertForMaskedLM.from_pretrained(pretrained) 31 | 32 | inputtext = "今天[MASK]情很好" 33 | 34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103) 35 | 36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1 37 | outputs = model(input_ids, masked_lm_labels=input_ids) 38 | loss, prediction_scores = outputs[:2] 39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist() 40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item() 41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] 42 | print(predicted_token,logit_prob[predicted_index]) 43 | ``` 44 | Result: `心 0.995713472366333` 45 | -------------------------------------------------------------------------------- /examples/translation/t5/test_t5_examples.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import tempfile 4 | import unittest 5 | from pathlib import Path 6 | from unittest.mock import patch 7 | 8 | from .evaluate_wmt import run_generate 9 | 10 | 11 | text = ["When Liana Barrientos was 23 years old, she got married in Westchester County."] 12 | translation = ["Als Liana Barrientos 23 Jahre alt war, heiratete sie in Westchester County."] 13 | 14 | output_file_name = "output_t5_trans.txt" 15 | score_file_name = "score_t5_trans.txt" 16 | 17 | logging.basicConfig(level=logging.DEBUG) 18 | 19 | logger = logging.getLogger() 20 | 21 | 22 | class TestT5Examples(unittest.TestCase): 23 | def test_t5_cli(self): 24 | stream_handler = logging.StreamHandler(sys.stdout) 25 | logger.addHandler(stream_handler) 26 | 27 | tmp_source = Path(tempfile.gettempdir()) / "utest_generations_t5_trans.hypo" 28 | with tmp_source.open("w") as f: 29 | f.write("\n".join(text)) 30 | 31 | tmp_target = Path(tempfile.gettempdir()) / "utest_generations_t5_trans.target" 32 | with tmp_target.open("w") as f: 33 | f.write("\n".join(translation)) 34 | 35 | output_file_name = Path(tempfile.gettempdir()) / "utest_output_trans.hypo" 36 | score_file_name = Path(tempfile.gettempdir()) / "utest_score.hypo" 37 | 38 | testargs = [ 39 | "evaluate_wmt.py", 40 | "patrickvonplaten/t5-tiny-random", 41 | str(tmp_source), 42 | str(output_file_name), 43 | str(tmp_target), 44 | str(score_file_name), 45 | ] 46 | 47 | with patch.object(sys, "argv", testargs): 48 | run_generate() 49 | self.assertTrue(Path(output_file_name).exists()) 50 | self.assertTrue(Path(score_file_name).exists()) 51 | -------------------------------------------------------------------------------- /model_cards/voidful/albert_chinese_tiny/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - chinese 4 | --- 5 | 6 | # albert_chinese_tiny 7 | 8 | This a albert_chinese_tiny model from [brightmart/albert_zh project](https://github.com/brightmart/albert_zh), albert_tiny_google_zh model 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py) 10 | 11 | ## Attention (注意) 12 | 13 | Since sentencepiece is not used in albert_chinese_tiny model 14 | you have to call BertTokenizer instead of AlbertTokenizer !!! 15 | we can eval it using an example on MaskedLM 16 | 17 | 由於 albert_chinese_tiny 模型沒有用 sentencepiece 18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!! 19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確 20 | 21 | ## Justify (驗證有效性) 22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj) 23 | ```python 24 | from transformers import * 25 | import torch 26 | from torch.nn.functional import softmax 27 | 28 | pretrained = 'voidful/albert_chinese_tiny' 29 | tokenizer = BertTokenizer.from_pretrained(pretrained) 30 | model = AlbertForMaskedLM.from_pretrained(pretrained) 31 | 32 | inputtext = "今天[MASK]情很好" 33 | 34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103) 35 | 36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1 37 | outputs = model(input_ids, masked_lm_labels=input_ids) 38 | loss, prediction_scores = outputs[:2] 39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist() 40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item() 41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] 42 | print(predicted_token,logit_prob[predicted_index]) 43 | ``` 44 | Result: `感 0.40312355756759644` 45 | -------------------------------------------------------------------------------- /model_cards/voidful/albert_chinese_small/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - chinese 4 | --- 5 | 6 | # albert_chinese_small 7 | 8 | This a albert_chinese_small model from [brightmart/albert_zh project](https://github.com/brightmart/albert_zh), albert_small_google_zh model 9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py) 10 | 11 | ## Attention (注意) 12 | 13 | Since sentencepiece is not used in albert_chinese_small model 14 | you have to call BertTokenizer instead of AlbertTokenizer !!! 15 | we can eval it using an example on MaskedLM 16 | 17 | 由於 albert_chinese_small 模型沒有用 sentencepiece 18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!! 19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確 20 | 21 | ## Justify (驗證有效性) 22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj) 23 | ```python 24 | from transformers import * 25 | import torch 26 | from torch.nn.functional import softmax 27 | 28 | pretrained = 'voidful/albert_chinese_small' 29 | tokenizer = BertTokenizer.from_pretrained(pretrained) 30 | model = AlbertForMaskedLM.from_pretrained(pretrained) 31 | 32 | inputtext = "今天[MASK]情很好" 33 | 34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103) 35 | 36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1 37 | outputs = model(input_ids, masked_lm_labels=input_ids) 38 | loss, prediction_scores = outputs[:2] 39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist() 40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item() 41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] 42 | print(predicted_token,logit_prob[predicted_index]) 43 | ``` 44 | Result: `感 0.6390823125839233` 45 | -------------------------------------------------------------------------------- /model_cards/jplu/tf-xlm-roberta-base/README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow XLM-RoBERTa 2 | 3 | In this repository you will find different versions of the XLM-RoBERTa model for Tensorflow. 4 | 5 | ## XLM-RoBERTa 6 | 7 | [XLM-RoBERTa](https://ai.facebook.com/blog/-xlm-r-state-of-the-art-cross-lingual-understanding-through-self-supervision/) is a scaled cross lingual sentence encoder. It is trained on 2.5T of data across 100 languages data filtered from Common Crawl. XLM-R achieves state-of-the-arts results on multiple cross lingual benchmarks. 8 | 9 | ## Model Weights 10 | 11 | | Model | Downloads 12 | | -------------------------------- | --------------------------------------------------------------------------------------------------------------- 13 | | `jplu/tf-xlm-roberta-base` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/tf_model.h5) 14 | | `jplu/tf-xlm-roberta-large` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/tf_model.h5) 15 | 16 | ## Usage 17 | 18 | With Transformers >= 2.4 the Tensorflow models of XLM-RoBERTa can be loaded like: 19 | 20 | ```python 21 | from transformers import TFXLMRobertaModel 22 | 23 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-base") 24 | ``` 25 | Or 26 | ``` 27 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large") 28 | ``` 29 | 30 | ## Huggingface model hub 31 | 32 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu). 33 | 34 | ## Acknowledgments 35 | 36 | Thanks to all the Huggingface team for the support and their amazing library! 37 | -------------------------------------------------------------------------------- /model_cards/jplu/tf-xlm-roberta-large/README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow XLM-RoBERTa 2 | 3 | In this repository you will find different versions of the XLM-RoBERTa model for Tensorflow. 4 | 5 | ## XLM-RoBERTa 6 | 7 | [XLM-RoBERTa](https://ai.facebook.com/blog/-xlm-r-state-of-the-art-cross-lingual-understanding-through-self-supervision/) is a scaled cross lingual sentence encoder. It is trained on 2.5T of data across 100 languages data filtered from Common Crawl. XLM-R achieves state-of-the-arts results on multiple cross lingual benchmarks. 8 | 9 | ## Model Weights 10 | 11 | | Model | Downloads 12 | | -------------------------------- | --------------------------------------------------------------------------------------------------------------- 13 | | `jplu/tf-xlm-roberta-base` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/tf_model.h5) 14 | | `jplu/tf-xlm-roberta-large` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/tf_model.h5) 15 | 16 | ## Usage 17 | 18 | With Transformers >= 2.4 the Tensorflow models of XLM-RoBERTa can be loaded like: 19 | 20 | ```python 21 | from transformers import TFXLMRobertaModel 22 | 23 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-base") 24 | ``` 25 | Or 26 | ``` 27 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large") 28 | ``` 29 | 30 | ## Huggingface model hub 31 | 32 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu). 33 | 34 | ## Acknowledgments 35 | 36 | Thanks to all the Huggingface team for the support and their amazing library! 37 | -------------------------------------------------------------------------------- /model_cards/allenai/biomed_roberta_base/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | thumbnail: https://huggingface.co/front/thumbnails/allenai.png 3 | --- 4 | 5 | # BioMed-RoBERTa-base 6 | 7 | BioMed-RoBERTa-base is a language model based on the RoBERTa-base (Liu et. al, 2019) architecture. We adapt RoBERTa-base to 2.68 million scientific papers from the [Semantic Scholar](https://www.semanticscholar.org) corpus via continued pretraining. This amounts to 7.55B tokens and 47GB of data. We use the full text of the papers in training, not just abstracts. 8 | 9 | Specific details of the adaptive pretraining procedure can be found in Gururangan et. al, 2020. 10 | 11 | 12 | ## Evaluation 13 | 14 | BioMed-RoBERTa achieves competitive performance to state of the art models on a number of NLP tasks in the biomedical domain (numbers are mean (standard deviation) over 3+ random seeds) 15 | 16 | 17 | | Task | Task Type | RoBERTa-base | BioMed-RoBERTa-base | 18 | |--------------|---------------------|--------------|---------------------| 19 | | RCT-180K | Text Classification | 86.4 (0.3) | 86.9 (0.2) | 20 | | ChemProt | Relation Extraction | 81.1 (1.1) | 83.0 (0.7) | 21 | | JNLPBA | NER | 74.3 (0.2) | 75.2 (0.1) | 22 | | BC5CDR | NER | 85.6 (0.1) | 87.8 (0.1) | 23 | | NCBI-Disease | NER | 86.6 (0.3) | 87.1 (0.8) | 24 | 25 | More evaluations TBD. 26 | 27 | ## Citation 28 | 29 | If using this model, please cite the following paper: 30 | 31 | ```bibtex 32 | @inproceedings{domains, 33 | author = {Suchin Gururangan and Ana Marasović and Swabha Swayamdipta and Kyle Lo and Iz Beltagy and Doug Downey and Noah A. Smith}, 34 | title = {Don't Stop Pretraining: Adapt Language Models to Domains and Tasks}, 35 | year = {2020}, 36 | booktitle = {Proceedings of ACL}, 37 | } 38 | ``` 39 | -------------------------------------------------------------------------------- /model_cards/google/electra-large-generator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: english 3 | thumbnail: https://huggingface.co/front/thumbnails/google.png 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators 9 | 10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset. 11 | 12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB). 13 | 14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)). 15 | 16 | ## How to use the generator in `transformers` 17 | 18 | ```python 19 | from transformers import pipeline 20 | 21 | fill_mask = pipeline( 22 | "fill-mask", 23 | model="google/electra-large-generator", 24 | tokenizer="google/electra-large-generator" 25 | ) 26 | 27 | print( 28 | fill_mask(f"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks.") 29 | ) 30 | 31 | ``` 32 | -------------------------------------------------------------------------------- /model_cards/google/electra-small-generator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: english 3 | thumbnail: https://huggingface.co/front/thumbnails/google.png 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators 9 | 10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset. 11 | 12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB). 13 | 14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)). 15 | 16 | ## How to use the generator in `transformers` 17 | 18 | ```python 19 | from transformers import pipeline 20 | 21 | fill_mask = pipeline( 22 | "fill-mask", 23 | model="google/electra-small-generator", 24 | tokenizer="google/electra-small-generator" 25 | ) 26 | 27 | print( 28 | fill_mask(f"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks.") 29 | ) 30 | 31 | ``` 32 | -------------------------------------------------------------------------------- /model_cards/google/electra-base-generator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: english 3 | thumbnail: https://huggingface.co/front/thumbnails/google.png 4 | 5 | license: apache-2.0 6 | --- 7 | 8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators 9 | 10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset. 11 | 12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB). 13 | 14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)). 15 | 16 | ## How to use the generator in `transformers` 17 | 18 | ```python 19 | from transformers import pipeline 20 | 21 | fill_mask = pipeline( 22 | "fill-mask", 23 | model="google/electra-base-generator", 24 | tokenizer="google/electra-base-generator" 25 | ) 26 | 27 | print( 28 | fill_mask(f"HuggingFace is creating a {fill_mask.tokenizer.mask_token} that the community uses to solve NLP tasks.") 29 | ) 30 | 31 | ``` 32 | -------------------------------------------------------------------------------- /examples/token-classification/run_pl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Install newest ptl. 4 | pip install -U git+http://github.com/PyTorchLightning/pytorch-lightning/ 5 | # for seqeval metrics import 6 | pip install -r ../requirements.txt 7 | 8 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \ 9 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp 10 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \ 11 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp 12 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \ 13 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp 14 | wget "https://raw.githubusercontent.com/stefan-it/fine-tuned-berts-seq/master/scripts/preprocess.py" 15 | export MAX_LENGTH=128 16 | export BERT_MODEL=bert-base-multilingual-cased 17 | python3 preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt 18 | python3 preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt 19 | python3 preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt 20 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt 21 | export BATCH_SIZE=32 22 | export NUM_EPOCHS=3 23 | export SEED=1 24 | 25 | export OUTPUT_DIR_NAME=germeval-model 26 | export CURRENT_DIR=${PWD} 27 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} 28 | mkdir -p $OUTPUT_DIR 29 | 30 | # Add parent directory to python path to access lightning_base.py 31 | export PYTHONPATH="../":"${PYTHONPATH}" 32 | 33 | python3 run_pl_ner.py --data_dir ./ \ 34 | --model_type bert \ 35 | --labels ./labels.txt \ 36 | --model_name_or_path $BERT_MODEL \ 37 | --output_dir $OUTPUT_DIR \ 38 | --max_seq_length $MAX_LENGTH \ 39 | --num_train_epochs $NUM_EPOCHS \ 40 | --train_batch_size $BATCH_SIZE \ 41 | --seed $SEED \ 42 | --do_train \ 43 | --do_predict -------------------------------------------------------------------------------- /model_cards/gaochangkuan/model_dir/README.md: -------------------------------------------------------------------------------- 1 | ## Generating Chinese poetry by topic. 2 | 3 | ```python 4 | from transformers import * 5 | 6 | tokenizer = BertTokenizer.from_pretrained("gaochangkuan/model_dir") 7 | 8 | model = AutoModelWithLMHead.from_pretrained("gaochangkuan/model_dir") 9 | 10 | 11 | prompt= '''田园躬耕''' 12 | 13 | length= 84 14 | stop_token='' 15 | 16 | temperature = 1.2 17 | 18 | repetition_penalty=1.3 19 | 20 | k= 30 21 | p= 0.95 22 | 23 | device ='cuda' 24 | seed=2020 25 | no_cuda=False 26 | 27 | prompt_text = prompt if prompt else input("Model prompt >>> ") 28 | 29 | encoded_prompt = tokenizer.encode( 30 | ''+prompt_text+'', 31 | add_special_tokens=False, 32 | return_tensors="pt" 33 | ) 34 | 35 | encoded_prompt = encoded_prompt.to(device) 36 | 37 | output_sequences = model.generate( 38 | input_ids=encoded_prompt, 39 | max_length=length, 40 | min_length=10, 41 | do_sample=True, 42 | early_stopping=True, 43 | num_beams=10, 44 | temperature=temperature, 45 | top_k=k, 46 | top_p=p, 47 | repetition_penalty=repetition_penalty, 48 | bad_words_ids=None, 49 | bos_token_id=tokenizer.bos_token_id, 50 | pad_token_id=tokenizer.pad_token_id, 51 | eos_token_id=tokenizer.eos_token_id, 52 | length_penalty=1.2, 53 | no_repeat_ngram_size=2, 54 | num_return_sequences=1, 55 | attention_mask=None, 56 | decoder_start_token_id=tokenizer.bos_token_id,) 57 | 58 | 59 | generated_sequence = output_sequences[0].tolist() 60 | text = tokenizer.decode(generated_sequence) 61 | 62 | 63 | text = text[: text.find(stop_token) if stop_token else None] 64 | 65 | print(''.join(text).replace(' ','').replace('','').replace('','')) 66 | ``` 67 | -------------------------------------------------------------------------------- /tests/test_tokenization_distilbert.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from transformers.tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast 18 | 19 | from .test_tokenization_bert import BertTokenizationTest 20 | from .utils import slow 21 | 22 | 23 | class DistilBertTokenizationTest(BertTokenizationTest): 24 | 25 | tokenizer_class = DistilBertTokenizer 26 | 27 | def get_rust_tokenizer(self, **kwargs): 28 | return DistilBertTokenizerFast.from_pretrained(self.tmpdirname, **kwargs) 29 | 30 | @slow 31 | def test_sequence_builders(self): 32 | tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") 33 | 34 | text = tokenizer.encode("sequence builders", add_special_tokens=False) 35 | text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False) 36 | 37 | encoded_sentence = tokenizer.build_inputs_with_special_tokens(text) 38 | encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2) 39 | 40 | assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] 41 | assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + text_2 + [ 42 | tokenizer.sep_token_id 43 | ] 44 | -------------------------------------------------------------------------------- /model_cards/allegro/herbert-klej-cased-tokenizer-v1/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: polish 3 | --- 4 | 5 | # HerBERT tokenizer 6 | 7 | **[HerBERT](https://en.wikipedia.org/wiki/Zbigniew_Herbert)** tokenizer is a character level byte-pair encoding with 8 | vocabulary size of 50k tokens. The tokenizer was trained on [Wolne Lektury](https://wolnelektury.pl/) and a publicly available subset of 9 | [National Corpus of Polish](http://nkjp.pl/index.php?page=14&lang=0) with [fastBPE](https://github.com/glample/fastBPE) library. 10 | Tokenizer utilize `XLMTokenizer` implementation from [transformers](https://github.com/huggingface/transformers). 11 | 12 | ## Tokenizer usage 13 | Herbert tokenizer should be used together with [HerBERT model](https://huggingface.co/allegro/herbert-klej-cased-v1): 14 | ```python 15 | from transformers import XLMTokenizer, RobertaModel 16 | 17 | tokenizer = XLMTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1") 18 | model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1") 19 | 20 | encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt') 21 | outputs = model(encoded_input) 22 | ``` 23 | 24 | ## License 25 | CC BY-SA 4.0 26 | 27 | ## Citation 28 | If you use this tokenizer, please cite the following paper: 29 | ``` 30 | @misc{rybak2020klej, 31 | title={KLEJ: Comprehensive Benchmark for Polish Language Understanding}, 32 | author={Piotr Rybak and Robert Mroczkowski and Janusz Tracz and Ireneusz Gawlik}, 33 | year={2020}, 34 | eprint={2005.00630}, 35 | archivePrefix={arXiv}, 36 | primaryClass={cs.CL} 37 | } 38 | ``` 39 | Paper is accepted at ACL 2020, as soon as proceedings appear, we will update the BibTeX. 40 | 41 | ## Authors 42 | Tokenizer was created by **Allegro Machine Learning Research** team. 43 | 44 | You can contact us at: klejbenchmark@allegro.pl 45 | -------------------------------------------------------------------------------- /model_cards/nlptown/bert-base-multilingual-uncased-sentiment/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - english 4 | - dutch 5 | - german 6 | - french 7 | - italian 8 | - spanish 9 | --- 10 | 11 | # bert-base-multilingual-uncased-sentiment 12 | 13 | This a bert-base-multilingual-uncased model finetuned for sentiment analysis on product reviews in six languages: English, Dutch, German, French, Spanish and Italian. It predicts the sentiment of the review as a number of stars (between 1 and 5). 14 | 15 | This model is intended for direct use as a sentiment analysis model for product reviews in any of the six languages above, or for further finetuning on related sentiment analysis tasks. 16 | 17 | ## Training data 18 | 19 | Here is the number of product reviews we used for finetuning the model: 20 | 21 | | Language | Number of reviews | 22 | | -------- | ----------------- | 23 | | English | 150k | 24 | | Dutch | 80k | 25 | | German | 137k | 26 | | French | 140k | 27 | | Italian | 72k | 28 | | Spanish | 50k | 29 | 30 | ## Accuracy 31 | 32 | The finetuned model obtained the following accuracy on 5,000 held-out product reviews in each of the languages: 33 | 34 | - Accuracy (exact) is the exact match on the number of stars. 35 | - Accuracy (off-by-1) is the percentage of reviews where the number of stars the model predicts differs by a maximum of 1 from the number given by the human reviewer. 36 | 37 | 38 | | Language | Accuracy (exact) | Accuracy (off-by-1) | 39 | | -------- | ---------------------- | ------------------- | 40 | | English | 67% | 95% 41 | | Dutch | 57% | 93% 42 | | German | 61% | 94% 43 | | French | 59% | 94% 44 | | Italian | 59% | 95% 45 | | Spanish | 58% | 95% 46 | 47 | ## Contact 48 | 49 | Contact [NLP Town](https://www.nlp.town) for questions, feedback and/or requests for similar models. 50 | -------------------------------------------------------------------------------- /model_cards/monologg/koelectra-base-discriminator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: Korean 3 | --- 4 | 5 | # KoELECTRA (Base Discriminator) 6 | 7 | Pretrained ELECTRA Language Model for Korean (`koelectra-base-discriminator`) 8 | 9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md). 10 | 11 | ## Usage 12 | 13 | ### Load model and tokenizer 14 | 15 | ```python 16 | >>> from transformers import ElectraModel, ElectraTokenizer 17 | 18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-base-discriminator") 19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator") 20 | ``` 21 | 22 | ### Tokenizer example 23 | 24 | ```python 25 | >>> from transformers import ElectraTokenizer 26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator") 27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]") 28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'] 29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']) 30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3] 31 | ``` 32 | 33 | ## Example using ElectraForPreTraining 34 | 35 | ```python 36 | import torch 37 | from transformers import ElectraForPreTraining, ElectraTokenizer 38 | 39 | discriminator = ElectraForPreTraining.from_pretrained("monologg/koelectra-base-discriminator") 40 | tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator") 41 | 42 | sentence = "나는 방금 밥을 먹었다." 43 | fake_sentence = "나는 내일 밥을 먹었다." 44 | 45 | fake_tokens = tokenizer.tokenize(fake_sentence) 46 | fake_inputs = tokenizer.encode(fake_sentence, return_tensors="pt") 47 | 48 | discriminator_outputs = discriminator(fake_inputs) 49 | predictions = torch.round((torch.sign(discriminator_outputs[0]) + 1) / 2) 50 | 51 | print(list(zip(fake_tokens, predictions.tolist()[1:-1]))) 52 | ``` 53 | -------------------------------------------------------------------------------- /model_cards/monologg/koelectra-small-discriminator/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: Korean 3 | --- 4 | 5 | # KoELECTRA (Small Discriminator) 6 | 7 | Pretrained ELECTRA Language Model for Korean (`koelectra-small-discriminator`) 8 | 9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md). 10 | 11 | ## Usage 12 | 13 | ### Load model and tokenizer 14 | 15 | ```python 16 | >>> from transformers import ElectraModel, ElectraTokenizer 17 | 18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-small-discriminator") 19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator") 20 | ``` 21 | 22 | ### Tokenizer example 23 | 24 | ```python 25 | >>> from transformers import ElectraTokenizer 26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator") 27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]") 28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'] 29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']) 30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3] 31 | ``` 32 | 33 | ## Example using ElectraForPreTraining 34 | 35 | ```python 36 | import torch 37 | from transformers import ElectraForPreTraining, ElectraTokenizer 38 | 39 | discriminator = ElectraForPreTraining.from_pretrained("monologg/koelectra-small-discriminator") 40 | tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator") 41 | 42 | sentence = "나는 방금 밥을 먹었다." 43 | fake_sentence = "나는 내일 밥을 먹었다." 44 | 45 | fake_tokens = tokenizer.tokenize(fake_sentence) 46 | fake_inputs = tokenizer.encode(fake_sentence, return_tensors="pt") 47 | 48 | discriminator_outputs = discriminator(fake_inputs) 49 | predictions = torch.round((torch.sign(discriminator_outputs[0]) + 1) / 2) 50 | 51 | print(list(zip(fake_tokens, predictions.tolist()[1:-1]))) 52 | ``` 53 | --------------------------------------------------------------------------------