├── tests
├── __init__.py
├── fixtures
│ ├── empty.txt
│ ├── dummy-config.json
│ ├── input.txt
│ ├── tests_samples
│ │ ├── .gitignore
│ │ ├── GermEval
│ │ │ └── labels.txt
│ │ ├── STS-B
│ │ │ ├── train.tsv
│ │ │ └── dev.tsv
│ │ └── MRPC
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ ├── spiece.model
│ ├── test_sentencepiece.model
│ └── hub-index.sample.json
├── test_adapter_saving.py
├── test_activations.py
├── test_adapter_fusion_saving.py
├── test_adapter_fusion_config.py
├── test_adapter_config.py
├── test_tokenization_utils.py
└── test_tokenization_distilbert.py
├── MANIFEST.in
├── examples
├── summarization
│ ├── __init__.py
│ ├── t5
│ │ ├── __init__.py
│ │ ├── download_cnn_daily_mail.py
│ │ ├── README.md
│ │ └── test_t5_examples.py
│ ├── bart
│ │ ├── __init__.py
│ │ ├── run_train.sh
│ │ └── run_train_tiny.sh
│ └── bertabs
│ │ ├── __init__.py
│ │ └── requirements.txt
├── translation
│ └── t5
│ │ ├── __init__.py
│ │ └── test_t5_examples.py
├── ner
│ └── .gitignore
├── distillation
│ ├── requirements.txt
│ └── training_configs
│ │ ├── distilgpt2.json
│ │ ├── distilbert-base-cased.json
│ │ ├── distilbert-base-uncased.json
│ │ ├── distilbert-base-multilingual-cased.json
│ │ └── distilroberta-base.json
├── text-generation
│ ├── pplm
│ │ ├── imgs
│ │ │ ├── wooly.png
│ │ │ └── headfigure.png
│ │ └── pplm_classification_head.py
│ └── README.md
├── movement-pruning
│ ├── emmental
│ │ ├── modules
│ │ │ └── __init__.py
│ │ └── __init__.py
│ └── requirements.txt
├── requirements.txt
├── contrib
│ ├── README.md
│ └── mm-imdb
│ │ └── README.md
├── token-classification
│ ├── test_ner_examples.py
│ ├── run.sh
│ └── run_pl.sh
├── text-classification
│ └── run_pl.sh
├── benchmarking
│ └── run_benchmark.py
├── adversarial
│ └── README.md
└── multiple-choice
│ └── README.md
├── docs
├── source
│ ├── examples.md
│ ├── notebooks.md
│ ├── favicon.ico
│ ├── _static
│ │ └── css
│ │ │ ├── Calibre-Thin.otf
│ │ │ ├── Calibre-Light.ttf
│ │ │ ├── Calibre-Medium.otf
│ │ │ ├── Calibre-Regular.otf
│ │ │ └── code-snippets.css
│ ├── imgs
│ │ ├── transformers_logo_name.png
│ │ ├── warmup_cosine_schedule.png
│ │ ├── warmup_linear_schedule.png
│ │ ├── warmup_constant_schedule.png
│ │ ├── warmup_cosine_hard_restarts_schedule.png
│ │ └── warmup_cosine_warm_restarts_schedule.png
│ ├── main_classes
│ │ ├── configuration.rst
│ │ └── model.rst
│ ├── model_doc
│ │ ├── encoderdecoder.rst
│ │ └── auto.rst
│ └── bertology.rst
└── Makefile
├── model_cards
├── bert-base-chinese-README.md
├── bert-large-cased-README.md
├── bart-large-cnn
│ └── README.md
├── bart-large-xsum
│ └── README.md
├── bert-base-german-dbmdz-cased-README.md
├── bert-base-german-dbmdz-uncased-README.md
├── google
│ ├── bert_uncased_L-10_H-128_A-2
│ │ └── README.md
│ ├── bert_uncased_L-10_H-256_A-4
│ │ └── README.md
│ ├── bert_uncased_L-10_H-512_A-8
│ │ └── README.md
│ ├── bert_uncased_L-12_H-128_A-2
│ │ └── README.md
│ ├── bert_uncased_L-12_H-256_A-4
│ │ └── README.md
│ ├── bert_uncased_L-12_H-512_A-8
│ │ └── README.md
│ ├── bert_uncased_L-2_H-128_A-2
│ │ └── README.md
│ ├── bert_uncased_L-2_H-256_A-4
│ │ └── README.md
│ ├── bert_uncased_L-2_H-512_A-8
│ │ └── README.md
│ ├── bert_uncased_L-2_H-768_A-12
│ │ └── README.md
│ ├── bert_uncased_L-4_H-128_A-2
│ │ └── README.md
│ ├── bert_uncased_L-4_H-256_A-4
│ │ └── README.md
│ ├── bert_uncased_L-4_H-512_A-8
│ │ └── README.md
│ ├── bert_uncased_L-4_H-768_A-12
│ │ └── README.md
│ ├── bert_uncased_L-6_H-128_A-2
│ │ └── README.md
│ ├── bert_uncased_L-6_H-256_A-4
│ │ └── README.md
│ ├── bert_uncased_L-6_H-512_A-8
│ │ └── README.md
│ ├── bert_uncased_L-6_H-768_A-12
│ │ └── README.md
│ ├── bert_uncased_L-8_H-128_A-2
│ │ └── README.md
│ ├── bert_uncased_L-8_H-256_A-4
│ │ └── README.md
│ ├── bert_uncased_L-8_H-512_A-8
│ │ └── README.md
│ ├── bert_uncased_L-8_H-768_A-12
│ │ └── README.md
│ ├── bert_uncased_L-10_H-768_A-12
│ │ └── README.md
│ ├── bert_uncased_L-12_H-768_A-12
│ │ └── README.md
│ ├── reformer-crime-and-punishment
│ │ └── README.md
│ ├── electra-large-generator
│ │ └── README.md
│ ├── electra-small-generator
│ │ └── README.md
│ └── electra-base-generator
│ │ └── README.md
├── facebook
│ └── bart-large-cnn
│ │ └── README.md
├── bert-base-multilingual-cased-README.md
├── distilbert-base-multilingual-cased-README.md
├── bert-base-multilingual-uncased-README.md
├── t5-11b-README.md
├── t5-3b-README.md
├── t5-base-README.md
├── t5-large-README.md
├── t5-small-README.md
├── severinsimmler
│ └── literary-german-bert
│ │ ├── kfold.png
│ │ └── prosa-jahre.png
├── deepset
│ ├── sentence_bert
│ │ └── README.md
│ └── bert-base-german-cased-oldvocab
│ │ └── README.md
├── djstrong
│ └── bg_cs_pl_ru_cased_L-12_H-768_A-12
│ │ └── README.md
├── gpt2-README.md
├── distilgpt2-README.md
├── roberta-base-README.md
├── xlm-roberta-base-README.md
├── albert-base-v1-README.md
├── albert-xxlarge-v2-README.md
├── bert-base-cased-README.md
├── bert-base-uncased-README.md
├── distilroberta-base-README.md
├── xlm-mlm-en-2048-README.md
├── distilbert-base-uncased-README.md
├── binwang
│ └── xlnet-base-cased
│ │ └── README.md
├── daigo
│ └── bert-base-japanese-sentiment
│ │ └── README.md
├── lvwerra
│ ├── gpt2-medium-taboo
│ │ └── README.md
│ ├── bert-imdb
│ │ └── README.md
│ ├── gpt2-imdb
│ │ └── README.md
│ ├── gpt2-imdb-pos
│ │ └── README.md
│ └── gpt2-imdb-ctrl
│ │ └── README.md
├── lysandre
│ ├── arxiv
│ │ └── README.md
│ └── arxiv-nlp
│ │ └── README.md
├── Hate-speech-CNERG
│ ├── dehatebert-mono-arabic
│ │ └── README.md
│ └── dehatebert-mono-english
│ │ └── README.md
├── jannesg
│ └── bertsson
│ │ └── README.md
├── DeepPavlov
│ ├── rubert-base-cased
│ │ └── README.md
│ ├── bert-base-bg-cs-pl-ru-cased
│ │ └── README.md
│ ├── rubert-base-cased-conversational
│ │ └── README.md
│ ├── rubert-base-cased-sentence
│ │ └── README.md
│ ├── bert-base-multilingual-cased-sentence
│ │ └── README.md
│ └── bert-base-cased-conversational
│ │ └── README.md
├── julien-c
│ ├── bert-xsmall-dummy
│ │ └── README.md
│ ├── EsperBERTo-small-pos
│ │ └── README.md
│ ├── dummy-unknown
│ │ └── README.md
│ └── EsperBERTo-small
│ │ └── README.md
├── spentaur
│ └── yelp
│ │ └── README.md
├── allenai
│ ├── longformer-base-4096-extra.pos.embd.only
│ │ └── README.md
│ ├── scibert_scivocab_cased
│ │ └── README.md
│ ├── scibert_scivocab_uncased
│ │ └── README.md
│ ├── longformer-base-4096
│ │ └── README.md
│ └── biomed_roberta_base
│ │ └── README.md
├── codegram
│ └── calbert-base-uncased
│ │ └── README.md
├── clue
│ ├── xlnet_chinese_large
│ │ └── README.md
│ ├── roberta_chinese_base
│ │ └── README.md
│ ├── roberta_chinese_large
│ │ └── README.md
│ ├── albert_chinese_tiny
│ │ └── README.md
│ └── albert_chinese_small
│ │ └── README.md
├── ViktorAlm
│ └── electra-base-norwegian-uncased-discriminator
│ │ └── README.md
├── canwenxu
│ └── BERT-of-Theseus-MNLI
│ │ └── README.md
├── surajp
│ └── albert-base-sanskrit
│ │ └── README.md
├── wptoux
│ └── albert-chinese-large-qa
│ │ └── README.md
├── illuin
│ ├── camembert-base-fquad
│ │ └── README.md
│ └── camembert-large-fquad
│ │ └── README.md
├── jplu
│ ├── tf-camembert-base
│ │ └── README.md
│ ├── tf-xlm-roberta-base
│ │ └── README.md
│ └── tf-xlm-roberta-large
│ │ └── README.md
├── twmkn9
│ ├── albert-base-v2-squad2
│ │ └── README.md
│ ├── bert-base-uncased-squad2
│ │ └── README.md
│ ├── distilroberta-base-squad2
│ │ └── README.md
│ └── distilbert-base-uncased-squad2
│ │ └── README.md
├── digitalepidemiologylab
│ └── covid-twitter-bert
│ │ └── README.md
├── fmikaelian
│ ├── camembert-base-fquad
│ │ └── README.md
│ ├── camembert-base-squad
│ │ └── README.md
│ └── flaubert-base-uncased-squad
│ │ └── README.md
├── activebus
│ ├── BERT-DK_rest
│ │ └── README.md
│ ├── BERT-PT_rest
│ │ └── README.md
│ ├── BERT-PT_laptop
│ │ └── README.md
│ └── BERT-DK_laptop
│ │ └── README.md
├── monologg
│ ├── koelectra-base-generator
│ │ └── README.md
│ ├── koelectra-small-generator
│ │ └── README.md
│ ├── koelectra-base-discriminator
│ │ └── README.md
│ └── koelectra-small-discriminator
│ │ └── README.md
├── ixa-ehu
│ └── berteus-base-cased
│ │ └── README.md
├── ahotrod
│ └── roberta_large_squad2
│ │ └── README.md
├── valhalla
│ └── t5-base-squad
│ │ └── README.md
├── Tereveni-AI
│ └── gpt2-124M-uk-fiction
│ │ └── README.md
├── voidful
│ ├── albert_chinese_base
│ │ └── README.md
│ ├── albert_chinese_large
│ │ └── README.md
│ ├── albert_chinese_xlarge
│ │ └── README.md
│ ├── albert_chinese_xxlarge
│ │ └── README.md
│ ├── albert_chinese_tiny
│ │ └── README.md
│ └── albert_chinese_small
│ │ └── README.md
├── gaochangkuan
│ └── model_dir
│ │ └── README.md
├── allegro
│ └── herbert-klej-cased-tokenizer-v1
│ │ └── README.md
└── nlptown
│ └── bert-base-multilingual-uncased-sentiment
│ └── README.md
├── adapter_docs
├── logo.png
├── favicon.png
├── adapter_types.md
├── classes
│ ├── adapter_modules.rst
│ ├── adapter_config.rst
│ ├── bert_mixins.rst
│ ├── adapter_utils.rst
│ ├── model_mixins.rst
│ ├── weights_loaders.rst
│ └── roberta.rst
├── _static
│ └── custom.css
├── Makefile
├── README.md
├── installation.md
└── make.bat
├── .coveragerc
├── src
└── transformers
│ ├── data
│ ├── datasets
│ │ └── __init__.py
│ ├── processors
│ │ └── __init__.py
│ └── __init__.py
│ ├── commands
│ ├── __init__.py
│ ├── transformers_cli.py
│ └── download.py
│ ├── benchmark
│ └── __init__.py
│ ├── trainer_utils.py
│ ├── configuration_marian.py
│ ├── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
│ ├── configuration_camembert.py
│ ├── configuration_mmbt.py
│ └── activations.py
├── templates
└── adding_a_new_example_script
│ └── README.md
├── .github
├── ISSUE_TEMPLATE
│ ├── new-adapter-setup.md
│ ├── feature-request.md
│ └── bug-report.md
└── workflows
│ ├── adapter_docs_build.yml
│ └── tests_torch.yml
├── deploy_multi_version_doc.sh
├── docker
├── transformers-pytorch-cpu
│ └── Dockerfile
├── transformers-tensorflow-cpu
│ └── Dockerfile
├── transformers-cpu
│ └── Dockerfile
├── transformers-pytorch-gpu
│ └── Dockerfile
├── transformers-tensorflow-gpu
│ └── Dockerfile
└── transformers-gpu
│ └── Dockerfile
├── setup.cfg
├── .circleci
└── deploy.sh
└── Makefile
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/fixtures/empty.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 |
--------------------------------------------------------------------------------
/examples/summarization/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/summarization/t5/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/translation/t5/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/summarization/bart/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/summarization/bertabs/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | ../../examples/README.md
--------------------------------------------------------------------------------
/docs/source/notebooks.md:
--------------------------------------------------------------------------------
1 | ../../notebooks/README.md
--------------------------------------------------------------------------------
/tests/fixtures/dummy-config.json:
--------------------------------------------------------------------------------
1 | {
2 | "model_type": "roberta"
3 | }
--------------------------------------------------------------------------------
/model_cards/bert-base-chinese-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: chinese
3 | ---
4 |
--------------------------------------------------------------------------------
/model_cards/bert-large-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: apache-2.0
3 | ---
4 |
--------------------------------------------------------------------------------
/examples/ner/.gitignore:
--------------------------------------------------------------------------------
1 | *.tmp
2 | cached_*
3 | *.txt
4 | preprocess.*
5 | *.ps1
6 |
--------------------------------------------------------------------------------
/tests/fixtures/input.txt:
--------------------------------------------------------------------------------
1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer
2 |
--------------------------------------------------------------------------------
/model_cards/bart-large-cnn/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | ---
5 |
6 |
--------------------------------------------------------------------------------
/model_cards/bart-large-xsum/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | ---
5 |
6 |
--------------------------------------------------------------------------------
/examples/summarization/bertabs/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 |
3 | # For ROUGE
4 | nltk
5 | py-rouge
6 |
--------------------------------------------------------------------------------
/model_cards/bert-base-german-dbmdz-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: german
3 | license: mit
4 | ---
5 |
--------------------------------------------------------------------------------
/model_cards/bert-base-german-dbmdz-uncased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: german
3 | license: mit
4 | ---
5 |
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-2_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-4_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-6_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-128_A-2/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-256_A-4/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-512_A-8/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-8_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/facebook/bart-large-cnn/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 |
5 | license: mit
6 | ---
7 |
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-10_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/model_cards/google/bert_uncased_L-12_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | ../../iuliaturc/bert_uncased_L-2_H-128_A-2/README.md
--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/.gitignore:
--------------------------------------------------------------------------------
1 | *.*
2 | cache*
3 | temp*
4 | !*.txt
5 | !*.tsv
6 | !*.json
7 | !.gitignore
--------------------------------------------------------------------------------
/adapter_docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/adapter_docs/logo.png
--------------------------------------------------------------------------------
/docs/source/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/favicon.ico
--------------------------------------------------------------------------------
/adapter_docs/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/adapter_docs/favicon.png
--------------------------------------------------------------------------------
/model_cards/bert-base-multilingual-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: multilingual
3 |
4 | license: apache-2.0
5 | ---
6 |
--------------------------------------------------------------------------------
/model_cards/distilbert-base-multilingual-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: multilingual
3 | license: apache-2.0
4 | ---
5 |
--------------------------------------------------------------------------------
/model_cards/bert-base-multilingual-uncased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: multilingual
3 |
4 | license: apache-2.0
5 | ---
6 |
--------------------------------------------------------------------------------
/model_cards/t5-11b-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 |
6 | license: apache-2.0
7 | ---
8 |
9 |
--------------------------------------------------------------------------------
/model_cards/t5-3b-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 |
6 | license: apache-2.0
7 | ---
8 |
9 |
--------------------------------------------------------------------------------
/model_cards/t5-base-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 |
6 | license: apache-2.0
7 | ---
8 |
9 |
--------------------------------------------------------------------------------
/model_cards/t5-large-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 |
6 | license: apache-2.0
7 | ---
8 |
9 |
--------------------------------------------------------------------------------
/model_cards/t5-small-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - summarization
4 | - translation
5 |
6 | license: apache-2.0
7 | ---
8 |
9 |
--------------------------------------------------------------------------------
/tests/fixtures/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/tests/fixtures/spiece.model
--------------------------------------------------------------------------------
/adapter_docs/adapter_types.md:
--------------------------------------------------------------------------------
1 | # Adapter Types
2 |
3 | TODO write something about different adapter types and configurations.
4 |
--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Thin.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Thin.otf
--------------------------------------------------------------------------------
/tests/fixtures/test_sentencepiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/tests/fixtures/test_sentencepiece.model
--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Light.ttf
--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Medium.otf
--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/_static/css/Calibre-Regular.otf
--------------------------------------------------------------------------------
/docs/source/imgs/transformers_logo_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/transformers_logo_name.png
--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_schedule.png
--------------------------------------------------------------------------------
/docs/source/imgs/warmup_linear_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_linear_schedule.png
--------------------------------------------------------------------------------
/docs/source/imgs/warmup_constant_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_constant_schedule.png
--------------------------------------------------------------------------------
/examples/distillation/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 |
3 | gitpython==3.0.2
4 | tensorboard>=1.14.0
5 | tensorboardX==1.8
6 | psutil==5.6.6
7 | scipy==1.3.1
8 |
--------------------------------------------------------------------------------
/examples/text-generation/pplm/imgs/wooly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/examples/text-generation/pplm/imgs/wooly.png
--------------------------------------------------------------------------------
/examples/text-generation/pplm/imgs/headfigure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/examples/text-generation/pplm/imgs/headfigure.png
--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png
--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png
--------------------------------------------------------------------------------
/model_cards/severinsimmler/literary-german-bert/kfold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/model_cards/severinsimmler/literary-german-bert/kfold.png
--------------------------------------------------------------------------------
/examples/movement-pruning/emmental/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer
3 | from .masked_nn import MaskedLinear
4 |
--------------------------------------------------------------------------------
/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/adapter-transformers/master/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png
--------------------------------------------------------------------------------
/model_cards/deepset/sentence_bert/README.md:
--------------------------------------------------------------------------------
1 | This is an upload of the bert-base-nli-stsb-mean-tokens pretrained model from the Sentence Transformers Repo (https://github.com/UKPLab/sentence-transformers)
2 |
--------------------------------------------------------------------------------
/adapter_docs/classes/adapter_modules.rst:
--------------------------------------------------------------------------------
1 | Adapter Modules
2 | ===============
3 |
4 | Classes implementing task and language adapters.
5 |
6 | .. automodule:: transformers.adapter_modeling
7 | :members:
8 |
--------------------------------------------------------------------------------
/model_cards/djstrong/bg_cs_pl_ru_cased_L-12_H-768_A-12/README.md:
--------------------------------------------------------------------------------
1 | Slavic BERT from https://github.com/deepmipt/Slavic-BERT-NER http://files.deeppavlov.ai/deeppavlov_data/bg_cs_pl_ru_cased_L-12_H-768_A-12.tar.gz
2 |
--------------------------------------------------------------------------------
/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorboard
2 | scikit-learn
3 | seqeval
4 | psutil
5 | sacrebleu
6 | rouge-score
7 | tensorflow_datasets
8 | pytorch-lightning==0.7.3 # April 10, 2020 release
9 | matplotlib
10 |
--------------------------------------------------------------------------------
/examples/movement-pruning/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.4.0
2 | -e git+https://github.com/huggingface/transformers.git@352d5472b0c1dec0f420d606d16747d851b4bda8#egg=transformers
3 | knockknock>=0.1.8.1
4 | h5py>=2.10.0
5 | numpy>=1.18.2
6 | scipy>=1.4.1
7 |
--------------------------------------------------------------------------------
/model_cards/gpt2-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: mit
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/adapter_docs/classes/adapter_config.rst:
--------------------------------------------------------------------------------
1 | Model Adapters Config
2 | =======================
3 |
4 | This class manages the setup and configuration of adapter modules in a pre-trained model.
5 |
6 | .. autoclass:: transformers.ModelAdaptersConfig
7 | :members:
8 |
--------------------------------------------------------------------------------
/adapter_docs/classes/bert_mixins.rst:
--------------------------------------------------------------------------------
1 | BERT Mixins
2 | ====================
3 |
4 | These classes added to the BERT module classes add support for adapters to all BERT-based transformer models.
5 |
6 | .. automodule:: transformers.adapter_bert
7 | :members:
8 |
--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilgpt2.json:
--------------------------------------------------------------------------------
1 | {
2 | "initializer_range": 0.02,
3 | "layer_norm_epsilon": 0.00001,
4 | "n_ctx": 1024,
5 | "n_embd": 768,
6 | "n_head": 12,
7 | "n_layer": 6,
8 | "n_positions": 1024,
9 | "vocab_size": 50257
10 | }
--------------------------------------------------------------------------------
/model_cards/distilgpt2-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: apache-2.0
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/model_cards/roberta-base-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: mit
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source=transformers
3 | omit =
4 | # skip convertion scripts from testing for now
5 | */convert_*
6 | */__main__.py
7 | [report]
8 | exclude_lines =
9 | pragma: no cover
10 | raise
11 | except
12 | register_parameter
--------------------------------------------------------------------------------
/adapter_docs/classes/adapter_utils.rst:
--------------------------------------------------------------------------------
1 | Adapter Utilities
2 | ====================
3 |
4 | A collection of utility methods mainly related to searching and loading adapter modules from
5 | Adapter-Hub.
6 |
7 | .. automodule:: transformers.adapter_utils
8 | :members:
9 |
--------------------------------------------------------------------------------
/model_cards/xlm-roberta-base-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: mit
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/model_cards/albert-base-v1-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: apache-2.0
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/model_cards/albert-xxlarge-v2-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: apache-2.0
6 | ---
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/model_cards/bert-base-cased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: apache-2.0
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/model_cards/bert-base-uncased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: apache-2.0
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/model_cards/distilroberta-base-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: apache-2.0
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/model_cards/xlm-mlm-en-2048-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: cc-by-nc-4.0
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/model_cards/distilbert-base-uncased-README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - exbert
4 |
5 | license: apache-2.0
6 | ---
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/model_cards/binwang/xlnet-base-cased/README.md:
--------------------------------------------------------------------------------
1 | This model is pre-trained **XLNET** with 12 layers.
2 |
3 | It comes with paper: SBERT-WK: A Sentence Embedding Method By Dissecting BERT-based Word Models
4 |
5 | Project Page: [SBERT-WK](https://github.com/BinWang28/SBERT-WK-Sentence-Embedding)
6 |
--------------------------------------------------------------------------------
/examples/contrib/README.md:
--------------------------------------------------------------------------------
1 | # Community contributed examples
2 |
3 | This folder contains examples which are not actively maintained (mostly contributed by the community).
4 |
5 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working.
6 |
--------------------------------------------------------------------------------
/docs/source/_static/css/code-snippets.css:
--------------------------------------------------------------------------------
1 |
2 | .highlight .c1, .highlight .sd{
3 | color: #999
4 | }
5 |
6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc {
7 | color: #FB8D68;
8 | }
9 |
10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow {
11 | color: #6670FF;
12 | }
--------------------------------------------------------------------------------
/src/transformers/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 |
5 | from .glue import GlueDataset, GlueDataTrainingArguments
6 | from .language_modeling import LineByLineTextDataset, TextDataset
7 |
--------------------------------------------------------------------------------
/model_cards/daigo/bert-base-japanese-sentiment/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - japanese
4 | ---
5 |
6 | binary classification
7 |
8 | # Usage
9 | ```
10 | print(pipeline("sentiment-analysis",model="daigo/bert-base-japanese-sentiment",tokenizer="daigo/bert-base-japanese-sentiment")("私は幸福である。"))
11 |
12 | [{'label': 'ポジティブ', 'score': 0.98430425}]
13 | ```
14 |
--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/GermEval/labels.txt:
--------------------------------------------------------------------------------
1 | B-LOC
2 | B-LOCderiv
3 | B-LOCpart
4 | B-ORG
5 | B-ORGderiv
6 | B-ORGpart
7 | B-OTH
8 | B-OTHderiv
9 | B-OTHpart
10 | B-PER
11 | B-PERderiv
12 | B-PERpart
13 | I-LOC
14 | I-LOCderiv
15 | I-LOCpart
16 | I-ORG
17 | I-ORGderiv
18 | I-ORGpart
19 | I-OTH
20 | I-OTHderiv
21 | I-OTHpart
22 | I-PER
23 | I-PERderiv
24 | I-PERpart
25 | O
26 |
--------------------------------------------------------------------------------
/examples/movement-pruning/emmental/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .configuration_bert_masked import MaskedBertConfig
3 | from .modeling_bert_masked import (
4 | MaskedBertForMultipleChoice,
5 | MaskedBertForQuestionAnswering,
6 | MaskedBertForSequenceClassification,
7 | MaskedBertForTokenClassification,
8 | MaskedBertModel,
9 | )
10 | from .modules import *
11 |
--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-cased.json:
--------------------------------------------------------------------------------
1 | {
2 | "activation": "gelu",
3 | "attention_dropout": 0.1,
4 | "dim": 768,
5 | "dropout": 0.1,
6 | "hidden_dim": 3072,
7 | "initializer_range": 0.02,
8 | "max_position_embeddings": 512,
9 | "n_heads": 12,
10 | "n_layers": 6,
11 | "sinusoidal_pos_embds": true,
12 | "tie_weights_": true,
13 | "vocab_size": 28996
14 | }
15 |
--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-medium-taboo/README.md:
--------------------------------------------------------------------------------
1 | # GPT-2 (medium) Taboo
2 |
3 | ## What is it?
4 | A fine-tuned GPT-2 version for Taboo cards generation.
5 |
6 | ## Training setting
7 |
8 | The model was trained on ~900 Taboo cards in the following format for 100 epochs:
9 | ```
10 | Describe the word Glitch without using the words Problem, Unexpected, Technology, Minor, Outage.
11 | ````
12 |
13 |
--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-uncased.json:
--------------------------------------------------------------------------------
1 | {
2 | "activation": "gelu",
3 | "attention_dropout": 0.1,
4 | "dim": 768,
5 | "dropout": 0.1,
6 | "hidden_dim": 3072,
7 | "initializer_range": 0.02,
8 | "max_position_embeddings": 512,
9 | "n_heads": 12,
10 | "n_layers": 6,
11 | "sinusoidal_pos_embds": true,
12 | "tie_weights_": true,
13 | "vocab_size": 30522
14 | }
15 |
--------------------------------------------------------------------------------
/src/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from argparse import ArgumentParser
3 |
4 |
5 | class BaseTransformersCLICommand(ABC):
6 | @staticmethod
7 | @abstractmethod
8 | def register_subcommand(parser: ArgumentParser):
9 | raise NotImplementedError()
10 |
11 | @abstractmethod
12 | def run(self):
13 | raise NotImplementedError()
14 |
--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-multilingual-cased.json:
--------------------------------------------------------------------------------
1 | {
2 | "activation": "gelu",
3 | "attention_dropout": 0.1,
4 | "dim": 768,
5 | "dropout": 0.1,
6 | "hidden_dim": 3072,
7 | "initializer_range": 0.02,
8 | "max_position_embeddings": 512,
9 | "n_heads": 12,
10 | "n_layers": 6,
11 | "sinusoidal_pos_embds": true,
12 | "tie_weights_": true,
13 | "vocab_size": 119547
14 | }
15 |
--------------------------------------------------------------------------------
/model_cards/lysandre/arxiv/README.md:
--------------------------------------------------------------------------------
1 | # ArXiv GPT-2 checkpoint
2 |
3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` finetuned to ArXiv paper on physics fields.
4 |
5 | ## Training data
6 |
7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 130MB of text, mostly from quantum physics (quant-ph) and other physics sub-fields.
8 |
--------------------------------------------------------------------------------
/src/transformers/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 |
5 | from ..file_utils import is_torch_available
6 |
7 |
8 | if is_torch_available():
9 | from .benchmark_args import PyTorchBenchmarkArguments
10 | from .benchmark import PyTorchBenchmark
11 |
--------------------------------------------------------------------------------
/model_cards/lysandre/arxiv-nlp/README.md:
--------------------------------------------------------------------------------
1 | # ArXiv-NLP GPT-2 checkpoint
2 |
3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` fine-tuned to ArXiv paper on the computational linguistics field.
4 |
5 | ## Training data
6 |
7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 80MB of text from the computational linguistics (cs.CL) field.
--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilroberta-base.json:
--------------------------------------------------------------------------------
1 | {
2 | "vocab_size": 50265,
3 | "hidden_size": 768,
4 | "num_hidden_layers": 6,
5 | "num_attention_heads": 12,
6 | "intermediate_size": 3072,
7 | "hidden_act": "gelu",
8 | "hidden_dropout_prob": 0.1,
9 | "attention_probs_dropout_prob": 0.1,
10 | "max_position_embeddings": 514,
11 | "type_vocab_size": 1,
12 | "initializer_range": 0.02,
13 | "layer_norm_eps": 0.00001
14 | }
--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-arabic/README.md:
--------------------------------------------------------------------------------
1 | This model is used detecting **hatespeech** in **Arabic language**. The mono in the name refers to the monolingual setting, where the model is trained using only Arabic language data. It is finetuned on multilingual bert model.
2 | The model is trained with different learning rates and the best validation score achieved is 0.8674776 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
3 |
--------------------------------------------------------------------------------
/model_cards/Hate-speech-CNERG/dehatebert-mono-english/README.md:
--------------------------------------------------------------------------------
1 | This model is used detecting **hatespeech** in **English language**. The mono in the name refers to the monolingual setting, where the model is trained using only English language data. It is finetuned on multilingual bert model.
2 | The model is trained with different learning rates and the best validation score achieved is 0.7069374 for a learning rate of 2e-5. Training code can be found at this [url](https://github.com/punyajoy/DE-LIMIT)
3 |
--------------------------------------------------------------------------------
/docs/source/main_classes/configuration.rst:
--------------------------------------------------------------------------------
1 | Configuration
2 | ----------------------------------------------------
3 |
4 | The base class ``PretrainedConfig`` implements the common methods for loading/saving a configuration either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository).
5 |
6 | ``PretrainedConfig``
7 | ~~~~~~~~~~~~~~~~~~~~~
8 |
9 | .. autoclass:: transformers.PretrainedConfig
10 | :members:
11 |
--------------------------------------------------------------------------------
/model_cards/jannesg/bertsson/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: swedish
3 | ---
4 |
5 | # BERTSSON Models
6 |
7 | The models are trained on:
8 | - Government Text
9 | - Swedish Literature
10 | - Swedish News
11 |
12 | Corpus size: Roughly 6B tokens.
13 |
14 | The following models are currently available:
15 |
16 | - **bertsson** - A BERT base model trained with the same hyperparameters as first published by Google.
17 |
18 | All models are cased and trained with whole word masking.
19 |
20 | Stay tuned for evaluations.
21 |
--------------------------------------------------------------------------------
/templates/adding_a_new_example_script/README.md:
--------------------------------------------------------------------------------
1 | # How to add a new example script in 🤗Transformers
2 |
3 | This folder provide a template for adding a new example script implementing a training or inference task with the models in the 🤗Transformers library.
4 |
5 | Currently only examples for PyTorch are provided which are adaptations of the library's SQuAD examples which implement single-GPU and distributed training with gradient accumulation and mixed-precision (using NVIDIA's apex library) to cover a reasonable range of use cases.
6 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/new-adapter-setup.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F31F New adapter setup"
3 | about: Submit a proposal/request to implement a new adapter setup or to add a new model
4 | title: ''
5 | labels: 'enhancement'
6 | assignees: ''
7 |
8 | ---
9 |
10 | # 🌟 New adapter setup
11 |
12 | ## Model description
13 |
14 |
15 |
16 | ## Open source status
17 |
18 | * [ ] the model implementation is available: (give details)
19 | * [ ] the model weights are available: (give details)
20 | * [ ] who are the authors: (mention them, if possible by @gh-username)
21 |
--------------------------------------------------------------------------------
/adapter_docs/classes/model_mixins.rst:
--------------------------------------------------------------------------------
1 | Model Mixins
2 | =======================
3 |
4 | These classes provide the basis of adapter module integration into model classes such as adapter saving and loading.
5 | Depending on the model, one of these mixins should be implemented by every adapter-supporting model class.
6 |
7 | ModelAdaptersMixin
8 | ------------------
9 |
10 | .. autoclass:: transformers.ModelAdaptersMixin
11 | :members:
12 |
13 | ModelWithHeadsAdaptersMixin
14 | ---------------------------
15 |
16 | .. autoclass:: transformers.ModelWithHeadsAdaptersMixin
17 | :members:
18 |
--------------------------------------------------------------------------------
/examples/summarization/bart/run_train.sh:
--------------------------------------------------------------------------------
1 | export OUTPUT_DIR_NAME=bart_sum
2 | export CURRENT_DIR=${PWD}
3 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
4 |
5 | # Make output directory if it doesn't exist
6 | mkdir -p $OUTPUT_DIR
7 |
8 | # Add parent directory to python path to access lightning_base.py
9 | export PYTHONPATH="../../":"${PYTHONPATH}"
10 |
11 | python finetune.py \
12 | --data_dir=./cnn-dailymail/cnn_dm \
13 | --model_name_or_path=bart-large \
14 | --learning_rate=3e-5 \
15 | --train_batch_size=4 \
16 | --eval_batch_size=4 \
17 | --output_dir=$OUTPUT_DIR \
18 | --do_train $@
19 |
--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - russian
4 | ---
5 |
6 | # rubert-base-cased
7 |
8 | RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on the Russian part of Wikipedia and news data. We used this training data to build a vocabulary of Russian subtokens and took a multilingual version of BERT‑base as an initialization for RuBERT\[1\].
9 |
10 |
11 | \[1\]: Kuratov, Y., Arkhipov, M. \(2019\). Adaptation of Deep Bidirectional Multilingual Transformers for Russian Language. arXiv preprint [arXiv:1905.07213](https://arxiv.org/abs/1905.07213).
12 |
--------------------------------------------------------------------------------
/tests/fixtures/hub-index.sample.json:
--------------------------------------------------------------------------------
1 | {
2 | "t": {
3 | "s": {
4 | "default": "path/to/default",
5 | "bb1c8efb82510bed": {
6 | "default": "path/to/pfeiffer/default",
7 | "versions": {
8 | "example-org": "path/to/pfeiffer/example-org",
9 | "ukp": "path/to/pfeiffer/ukp"
10 | }
11 | },
12 | "b1017368d7a97b11": {
13 | "versions": {
14 | "example-org": "path/to/houlsby/example-org"
15 | }
16 | }
17 | }
18 | }
19 | }
--------------------------------------------------------------------------------
/src/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 |
5 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
6 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
7 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
8 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
9 |
--------------------------------------------------------------------------------
/examples/text-generation/README.md:
--------------------------------------------------------------------------------
1 | ## Language generation
2 |
3 | Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/text-generation/run_generation.py).
4 |
5 | Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL, XLNet, CTRL.
6 | A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
7 | can try out the different models available in the library.
8 |
9 | Example usage:
10 |
11 | ```bash
12 | python run_generation.py \
13 | --model_type=gpt2 \
14 | --model_name_or_path=gpt2
15 | ```
16 |
--------------------------------------------------------------------------------
/src/transformers/trainer_utils.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, NamedTuple, Optional
2 |
3 | import numpy as np
4 |
5 |
6 | class EvalPrediction(NamedTuple):
7 | """
8 | Evaluation output (always contains labels), to be used
9 | to compute metrics.
10 | """
11 |
12 | predictions: np.ndarray
13 | label_ids: np.ndarray
14 |
15 |
16 | class PredictionOutput(NamedTuple):
17 | predictions: np.ndarray
18 | label_ids: Optional[np.ndarray]
19 | metrics: Optional[Dict[str, float]]
20 |
21 |
22 | class TrainOutput(NamedTuple):
23 | global_step: int
24 | training_loss: float
25 |
26 |
27 | PREFIX_CHECKPOINT_DIR = "checkpoint"
28 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = source
8 | BUILDDIR = _build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/deploy_multi_version_doc.sh:
--------------------------------------------------------------------------------
1 | cd docs
2 |
3 | function deploy_doc(){
4 | echo "Creating doc at commit $1 and pushing to folder $2"
5 | git checkout $1
6 | if [ ! -z "$2" ]
7 | then
8 | echo "Pushing version" $2
9 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
10 | else
11 | echo "Pushing master"
12 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
13 | fi
14 | }
15 |
16 | deploy_doc "master"
17 | deploy_doc "b33a385" v1.0.0
18 | deploy_doc "fe02e45" v1.1.0
19 | deploy_doc "89fd345" v1.2.0
20 | deploy_doc "fc9faa8" v2.0.0
21 | deploy_doc "3ddce1d" v2.1.1
22 | deploy_doc "f2f3294" v2.2.0
23 | deploy_doc "d0f8b9a" v2.3.0
24 |
--------------------------------------------------------------------------------
/model_cards/lvwerra/bert-imdb/README.md:
--------------------------------------------------------------------------------
1 | # BERT-IMDB
2 |
3 | ## What is it?
4 | BERT (`bert-large-cased`) trained for sentiment classification on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews).
5 |
6 | ## Training setting
7 |
8 | The model was trained on 80% of the IMDB dataset for sentiment classification for three epochs with a learning rate of `1e-5` with the `simpletransformers` library. The library uses a learning rate schedule.
9 |
10 | ## Result
11 | The model achieved 90% classification accuracy on the validation set.
12 |
13 | ## Reference
14 | The full experiment is available in the [tlr repo](https://lvwerra.github.io/trl/03-bert-imdb-training/).
15 |
--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-bg-cs-pl-ru-cased/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - bulgarian
4 | - czech
5 | - polish
6 | - russian
7 | ---
8 |
9 | # bert-base-bg-cs-pl-ru-cased
10 |
11 | SlavicBERT\[1\] \(Slavic \(bg, cs, pl, ru\), cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on Russian News and four Wikipedias: Bulgarian, Czech, Polish, and Russian. Subtoken vocabulary was built using this data. Multilingual BERT was used as an initialization for SlavicBERT.
12 |
13 |
14 | \[1\]: Arkhipov M., Trofimova M., Kuratov Y., Sorokin A. \(2019\). [Tuning Multilingual Transformers for Language-Specific Named Entity Recognition](https://www.aclweb.org/anthology/W19-3712/). ACL anthology W19-3712.
15 |
--------------------------------------------------------------------------------
/adapter_docs/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* The search field on top of the toc tree */
2 | /* Mobile header */
3 | .wy-side-nav-search, .wy-nav-top {
4 | background: #39B3C6;
5 | }
6 | /* toc tree text */
7 | .wy-menu-vertical header,
8 | .wy-menu-vertical p.caption {
9 | color: #39B3C6
10 | }
11 | /* toc tree activated link */
12 | .wy-menu-vertical a:active {
13 | background-color:#39B3C6;
14 | }
15 | /* Links */
16 | a {
17 | color: #39B3C6
18 | }
19 | /* Source spans */
20 | .rst-content .viewcode-link, .rst-content .viewcode-back{
21 | color: #39B3C6;
22 | }
23 | /* The literal code blocks */
24 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {
25 | color: #39B3C6;
26 | }
27 |
--------------------------------------------------------------------------------
/adapter_docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docker/transformers-pytorch-cpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:18.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="transformers"
4 |
5 | RUN apt update && \
6 | apt install -y bash \
7 | build-essential \
8 | git \
9 | curl \
10 | ca-certificates \
11 | python3 \
12 | python3-pip && \
13 | rm -rf /var/lib/apt/lists
14 |
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 | python3 -m pip install --no-cache-dir \
17 | jupyter \
18 | torch
19 |
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 | python3 -m pip install --no-cache-dir .
24 |
25 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/docker/transformers-tensorflow-cpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:18.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="transformers"
4 |
5 | RUN apt update && \
6 | apt install -y bash \
7 | build-essential \
8 | git \
9 | curl \
10 | ca-certificates \
11 | python3 \
12 | python3-pip && \
13 | rm -rf /var/lib/apt/lists
14 |
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 | python3 -m pip install --no-cache-dir \
17 | mkl \
18 | tensorflow-cpu
19 |
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 | python3 -m pip install --no-cache-dir .
24 |
25 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/docker/transformers-cpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:18.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="transformers"
4 |
5 | RUN apt update && \
6 | apt install -y bash \
7 | build-essential \
8 | git \
9 | curl \
10 | ca-certificates \
11 | python3 \
12 | python3-pip && \
13 | rm -rf /var/lib/apt/lists
14 |
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 | python3 -m pip install --no-cache-dir \
17 | jupyter \
18 | tensorflow-cpu \
19 | torch
20 |
21 | WORKDIR /workspace
22 | COPY . transformers/
23 | RUN cd transformers/ && \
24 | python3 -m pip install --no-cache-dir .
25 |
26 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/docker/transformers-pytorch-gpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="transformers"
4 |
5 | RUN apt update && \
6 | apt install -y bash \
7 | build-essential \
8 | git \
9 | curl \
10 | ca-certificates \
11 | python3 \
12 | python3-pip && \
13 | rm -rf /var/lib/apt/lists
14 |
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 | python3 -m pip install --no-cache-dir \
17 | mkl \
18 | torch
19 |
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 | python3 -m pip install --no-cache-dir .
24 |
25 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/adapter_docs/README.md:
--------------------------------------------------------------------------------
1 | # The adapter-transformers documentation
2 |
3 | This is the documentation of the adapter-related parts of the transformers library and the Adapter-Hub. Huggingface's documentation of the base library is located in the `/docs` folder.
4 |
5 | ## Installing & Building
6 |
7 | Building the documentation requires some additional packages installed. You can install them by running the following command in the root folder:
8 |
9 | ```bash
10 | pip install -e ".[docs]"
11 | ```
12 |
13 | Cleaning and regenerating the documentation files can be done using `sphinx` by running the following command in the `/adapter_docs` folder:
14 |
15 | ```bash
16 | make clean && make html
17 | ```
18 |
19 | The build output will be located in `/adapter_docs/_build/html`.
20 |
--------------------------------------------------------------------------------
/docker/transformers-tensorflow-gpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="transformers"
4 |
5 | RUN apt update && \
6 | apt install -y bash \
7 | build-essential \
8 | git \
9 | curl \
10 | ca-certificates \
11 | python3 \
12 | python3-pip && \
13 | rm -rf /var/lib/apt/lists
14 |
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 | python3 -m pip install --no-cache-dir \
17 | mkl \
18 | tensorflow
19 |
20 | WORKDIR /workspace
21 | COPY . transformers/
22 | RUN cd transformers/ && \
23 | python3 -m pip install --no-cache-dir .
24 |
25 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/examples/text-generation/pplm/pplm_classification_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class ClassificationHead(torch.nn.Module):
5 | """Classification Head for transformer encoders"""
6 |
7 | def __init__(self, class_size, embed_size):
8 | super().__init__()
9 | self.class_size = class_size
10 | self.embed_size = embed_size
11 | # self.mlp1 = torch.nn.Linear(embed_size, embed_size)
12 | # self.mlp2 = (torch.nn.Linear(embed_size, class_size))
13 | self.mlp = torch.nn.Linear(embed_size, class_size)
14 |
15 | def forward(self, hidden_state):
16 | # hidden_state = F.relu(self.mlp1(hidden_state))
17 | # hidden_state = self.mlp2(hidden_state)
18 | logits = self.mlp(hidden_state)
19 | return logits
20 |
--------------------------------------------------------------------------------
/examples/contrib/mm-imdb/README.md:
--------------------------------------------------------------------------------
1 | ## MM-IMDb
2 |
3 | Based on the script [`run_mmimdb.py`](https://github.com/huggingface/transformers/blob/master/examples/contrib/mm-imdb/run_mmimdb.py).
4 |
5 | [MM-IMDb](http://lisi1.unal.edu.co/mmimdb/) is a Multimodal dataset with around 26,000 movies including images, plots and other metadata.
6 |
7 | ### Training on MM-IMDb
8 |
9 | ```
10 | python run_mmimdb.py \
11 | --data_dir /path/to/mmimdb/dataset/ \
12 | --model_type bert \
13 | --model_name_or_path bert-base-uncased \
14 | --output_dir /path/to/save/dir/ \
15 | --do_train \
16 | --do_eval \
17 | --max_seq_len 512 \
18 | --gradient_accumulation_steps 20 \
19 | --num_image_embeds 3 \
20 | --num_train_epochs 100 \
21 | --patience 5
22 | ```
23 |
24 |
--------------------------------------------------------------------------------
/docker/transformers-gpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="transformers"
4 |
5 | RUN apt update && \
6 | apt install -y bash \
7 | build-essential \
8 | git \
9 | curl \
10 | ca-certificates \
11 | python3 \
12 | python3-pip && \
13 | rm -rf /var/lib/apt/lists
14 |
15 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
16 | python3 -m pip install --no-cache-dir \
17 | jupyter \
18 | tensorflow \
19 | torch
20 |
21 | WORKDIR /workspace
22 | COPY . transformers/
23 | RUN cd transformers/ && \
24 | python3 -m pip install --no-cache-dir .
25 |
26 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | ensure_newline_before_comments = True
3 | force_grid_wrap = 0
4 | include_trailing_comma = True
5 | known_first_party = transformers
6 | known_third_party =
7 | absl
8 | fairseq
9 | fastprogress
10 | git
11 | h5py
12 | MeCab
13 | nltk
14 | numpy
15 | packaging
16 | PIL
17 | psutil
18 | pytorch_lightning
19 | rouge_score
20 | sacrebleu
21 | seqeval
22 | sklearn
23 | tensorboardX
24 | tensorflow
25 | tensorflow_datasets
26 | timeout_decorator
27 | torch
28 | torchtext
29 | torchvision
30 | torch_xla
31 | tqdm
32 |
33 | line_length = 119
34 | lines_after_imports = 2
35 | multi_line_output = 3
36 | use_parentheses = True
37 |
38 | [flake8]
39 | ignore = E203, E501, E741, W503
40 | max-line-length = 119
41 |
--------------------------------------------------------------------------------
/model_cards/julien-c/bert-xsmall-dummy/README.md:
--------------------------------------------------------------------------------
1 | ## How to build a dummy model
2 |
3 |
4 | ```python
5 | from transformers.configuration_bert import BertConfig
6 | from transformers.modeling_bert import BertForMaskedLM
7 | from transformers.modeling_tf_bert import TFBertForMaskedLM
8 | from transformers.tokenization_bert import BertTokenizer
9 |
10 |
11 | SMALL_MODEL_IDENTIFIER = "julien-c/bert-xsmall-dummy"
12 | DIRNAME = "./bert-xsmall-dummy"
13 |
14 | config = BertConfig(10, 20, 1, 1, 40)
15 |
16 | model = BertForMaskedLM(config)
17 | model.save_pretrained(DIRNAME)
18 |
19 | tf_model = TFBertForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
20 | tf_model.save_pretrained(DIRNAME)
21 |
22 | # Slightly different for tokenizer.
23 | # tokenizer = BertTokenizer.from_pretrained(DIRNAME)
24 | # tokenizer.save_pretrained()
25 | ```
26 |
--------------------------------------------------------------------------------
/model_cards/spentaur/yelp/README.md:
--------------------------------------------------------------------------------
1 | # DistilBERT Yelp Review Sentiment
2 | This model is used for sentiment analysis on english yelp reviews.
3 | It is a DistilBERT model trained on 1 million reviews from the yelp open dataset.
4 | It is a regression model, with outputs in the range of ~-2 to ~2. With -2 being 1 star and 2 being 5 stars.
5 | It was trained using the [ktrain](https://github.com/amaiya/ktrain) because of it's ease of use.
6 |
7 | Example use:
8 |
9 | ```
10 | tokenizer = AutoTokenizer.from_pretrained(
11 | 'distilbert-base-uncased', use_fast=True)
12 | model = TFAutoModelForSequenceClassification.from_pretrained(
13 | "spentaur/yelp")
14 |
15 | review = "This place is great!"
16 | input_ids = tokenizer.encode(review, return_tensors='tf')
17 | pred = model(input_ids)[0][0][0].numpy()
18 | # pred should === 1.9562385
19 | ```
20 |
--------------------------------------------------------------------------------
/.circleci/deploy.sh:
--------------------------------------------------------------------------------
1 | cd docs
2 |
3 | function deploy_doc(){
4 | echo "Creating doc at commit $1 and pushing to folder $2"
5 | git checkout $1
6 | if [ ! -z "$2" ]
7 | then
8 | if [ -d "$dir/$2" ]; then
9 | echo "Directory" $2 "already exists"
10 | else
11 | echo "Pushing version" $2
12 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
13 | fi
14 | else
15 | echo "Pushing master"
16 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
17 | fi
18 | }
19 |
20 | deploy_doc "master"
21 | deploy_doc "b33a385" v1.0.0
22 | deploy_doc "fe02e45" v1.1.0
23 | deploy_doc "89fd345" v1.2.0
24 | deploy_doc "fc9faa8" v2.0.0
25 | deploy_doc "3ddce1d" v2.1.1
26 | deploy_doc "3616209" v2.2.0
27 | deploy_doc "d0f8b9a" v2.3.0
28 | deploy_doc "6664ea9" v2.4.0
29 | deploy_doc "fb560dc" v2.5.0
30 |
--------------------------------------------------------------------------------
/adapter_docs/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | Our *adapter-transformers* package is a drop-in replacement for Huggingface's *transformers* library. As the original package, it is tested on Python 3.6+ and PyTorch 1.1.0+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first.
4 |
5 | ## Using pip (from GitHub)
6 |
7 | The simplest way of installation is by using pip to install the package from our GitHub repository:
8 |
9 | ```
10 | pip install git+https://github.com/adapter-hub/adapter-transformers.git
11 | ```
12 |
13 | ## From repository
14 |
15 | Alternatively, you can clone the repository first and install the package from source.
16 | This allows you to run the included example scripts:
17 |
18 | ```
19 | git clone https://github.com/adapter-hub/adapter-transformers.git
20 | cd adapter-transformers
21 | pip install .
22 | ```
23 |
--------------------------------------------------------------------------------
/src/transformers/data/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 |
5 | from .metrics import is_sklearn_available
6 | from .processors import (
7 | DataProcessor,
8 | InputExample,
9 | InputFeatures,
10 | SingleSentenceClassificationProcessor,
11 | SquadExample,
12 | SquadFeatures,
13 | SquadV1Processor,
14 | SquadV2Processor,
15 | glue_convert_examples_to_features,
16 | glue_output_modes,
17 | glue_processors,
18 | glue_tasks_num_labels,
19 | squad_convert_examples_to_features,
20 | xnli_output_modes,
21 | xnli_processors,
22 | xnli_tasks_num_labels,
23 | )
24 |
25 |
26 | if is_sklearn_available():
27 | from .metrics import glue_compute_metrics, xnli_compute_metrics
28 |
--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-imdb/README.md:
--------------------------------------------------------------------------------
1 | # GPT2-IMDB
2 |
3 | ## What is it?
4 | A GPT2 (`gpt2`) language model fine-tuned on the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews).
5 |
6 | ## Training setting
7 |
8 | The GPT2 language model was fine-tuned for 1 epoch on the IMDB dataset. All comments were joined into a single text file separated by the EOS token:
9 |
10 | ```
11 | import pandas as pd
12 | df = pd.read_csv("imdb-dataset.csv")
13 | imdb_str = " <|endoftext|> ".join(df['review'].tolist())
14 |
15 | with open ('imdb.txt', 'w') as f:
16 | f.write(imdb_str)
17 | ```
18 |
19 | To train the model the `run_language_modeling.py` script in the `transformer` library was used:
20 |
21 | ```
22 | python run_language_modeling.py
23 | --train_data_file imdb.txt
24 | --output_dir gpt2-imdb
25 | --model_type gpt2
26 | --model_name_or_path gpt2
27 | ```
28 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F680 Feature request"
3 | about: Submit a proposal/request for a new adapter-transformers feature
4 | title: ''
5 | labels: 'enhancement'
6 | assignees: ''
7 |
8 | ---
9 |
10 | # 🚀 Feature request
11 |
12 |
14 |
15 | ## Motivation
16 |
17 |
20 |
21 | ## Your contribution
22 |
23 |
26 |
--------------------------------------------------------------------------------
/adapter_docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/model_cards/allenai/longformer-base-4096-extra.pos.embd.only/README.md:
--------------------------------------------------------------------------------
1 |
2 | # longformer-base-4096-extra.pos.embd.only
3 |
4 | This model is similar to `longformer-base-4096` but it was pretrained to preserve RoBERTa weights by freezing all RoBERTa weights and only train the additional position embeddings.
5 |
6 |
7 | ### Citing
8 |
9 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150).
10 | ```
11 | @article{Beltagy2020Longformer,
12 | title={Longformer: The Long-Document Transformer},
13 | author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
14 | journal={arXiv:2004.05150},
15 | year={2020},
16 | }
17 | ```
18 |
19 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org).
20 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.
21 |
--------------------------------------------------------------------------------
/docs/source/main_classes/model.rst:
--------------------------------------------------------------------------------
1 | Models
2 | ----------------------------------------------------
3 |
4 | The base class ``PreTrainedModel`` implements the common methods for loading/saving a model either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository).
5 |
6 | ``PreTrainedModel`` also implements a few methods which are common among all the models to:
7 |
8 | - resize the input token embeddings when new tokens are added to the vocabulary
9 | - prune the attention heads of the model.
10 |
11 | ``PreTrainedModel``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 |
14 | .. autoclass:: transformers.PreTrainedModel
15 | :members:
16 |
17 | ``Helper Functions``
18 | ~~~~~~~~~~~~~~~~~~~~~
19 |
20 | .. autofunction:: transformers.apply_chunking_to_forward
21 |
22 |
23 | ``TFPreTrainedModel``
24 | ~~~~~~~~~~~~~~~~~~~~~
25 |
26 | .. autoclass:: transformers.TFPreTrainedModel
27 | :members:
28 |
--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased-conversational/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - russian
4 | ---
5 |
6 | # rubert-base-cased-conversational
7 |
8 | Conversational RuBERT \(Russian, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) was trained on OpenSubtitles\[1\], [Dirty](https://d3.ru/), [Pikabu](https://pikabu.ru/), and a Social Media segment of Taiga corpus\[2\]. We assembled a new vocabulary for Conversational RuBERT model on this data and initialized the model with [RuBERT](../rubert-base-cased).
9 |
10 |
11 | \[1\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\)
12 |
13 | \[2\]: Shavrina T., Shapovalova O. \(2017\) TO THE METHODOLOGY OF CORPUS CONSTRUCTION FOR MACHINE LEARNING: «TAIGA» SYNTAX TREE CORPUS AND PARSER. in proc. of “CORPORA2017”, international conference , Saint-Petersbourg, 2017.
14 |
--------------------------------------------------------------------------------
/tests/test_adapter_saving.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from transformers import ADAPTER_CONFIG_MAP, AdapterType, BertModel, RobertaModel, XLMRobertaModel
4 |
5 | from .utils import require_torch
6 |
7 |
8 | @require_torch
9 | class AdapterModelTest(unittest.TestCase):
10 | model_classes = [BertModel, RobertaModel, XLMRobertaModel]
11 |
12 | def test_model_config_serialization(self):
13 | """PretrainedConfigurations should not raise an Exception when serializing the config dict
14 |
15 | See, e.g., PretrainedConfig.to_json_string()
16 | """
17 | for model_class in self.model_classes:
18 | for k, v in ADAPTER_CONFIG_MAP.items():
19 | model_config = model_class.config_class
20 | model = model_class(model_config())
21 | model.add_adapter("test", adapter_type=AdapterType.text_task, config=v)
22 | # should not raise an exception
23 | model.config.to_json_string()
24 |
--------------------------------------------------------------------------------
/.github/workflows/adapter_docs_build.yml:
--------------------------------------------------------------------------------
1 | name: Build Adapter Docs
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | paths: [ 'adapter_docs/**' ]
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v2
13 | with:
14 | submodules: recursive
15 | - uses: actions/setup-python@v2
16 | with:
17 | python-version: 3.6
18 | - name: Install
19 | run: |
20 | pip install .[tf,torch,docs]
21 | - name: Build
22 | run: |
23 | cd adapter_docs && make html && cd ..
24 | - name: Deploy
25 | uses: peaceiris/actions-gh-pages@v3
26 | with:
27 | github_token: ${{ secrets.GITHUB_TOKEN }}
28 | user_name: "Adapter-Hub-Bert"
29 | user_email: "---"
30 | publish_dir: ./adapter_docs/_build/html
31 | publish_branch: gh-pages
32 | force_orphan: true
33 | cname: docs.adapterhub.ml
34 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: quality style test test-examples
2 |
3 | # Check that source code meets quality standards
4 |
5 | quality:
6 | black --check --line-length 119 --target-version py35 examples templates tests src utils
7 | isort --check-only --recursive examples templates tests src utils
8 | flake8 examples templates tests src utils
9 |
10 | # Format source code automatically
11 |
12 | style:
13 | black --line-length 119 --target-version py35 examples templates tests src utils
14 | isort --recursive examples templates tests src utils
15 |
16 | # Run tests for the library
17 |
18 | test:
19 | python -m pytest -n auto --dist=loadfile -s -v ./tests/
20 |
21 | test-reduced:
22 | python -m pytest -n auto --dist=loadfile -s -v\
23 | --ignore-glob='tests/test_tokenization*'\
24 | --ignore-glob='tests/test_pipelines*'\
25 | --ignore-glob='tests/test_hf*'\
26 | --ignore-glob='tests/test_doc*'\
27 | ./tests/
28 |
29 | # Run tests for examples
30 |
31 | test-examples:
32 | python -m pytest -n auto --dist=loadfile -s -v ./examples/
33 |
--------------------------------------------------------------------------------
/model_cards/codegram/calbert-base-uncased/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: catalan
3 | ---
4 |
5 | # CALBERT: a Catalan Language Model
6 |
7 | ## Introduction
8 |
9 | CALBERT is an open-source language model for Catalan based on the ALBERT architecture.
10 |
11 | It is now available on Hugging Face in its `base-uncased` version, and was pretrained on the [OSCAR dataset](https://traces1.inria.fr/oscar/).
12 |
13 | For further information or requests, please go to the [GitHub repository](https://github.com/codegram/calbert)
14 |
15 | ## Pre-trained models
16 |
17 | | Model | Arch. | Training data |
18 | |-------------------------------------|------------------|-----------------------------------|
19 | | `codegram` / `calbert-base-uncased` | Base (uncased) | OSCAR (4.3 GB of text) |
20 |
21 |
22 | ## Authors
23 |
24 | CALBERT was trained and evaluated by [Txus Bach](https://twitter.com/txustice), as part of [Codegram](https://www.codegram.com)'s applied research.
25 |
26 |
--------------------------------------------------------------------------------
/examples/summarization/bart/run_train_tiny.sh:
--------------------------------------------------------------------------------
1 | # Script for verifying that run_bart_sum can be invoked from its directory
2 |
3 | # Get tiny dataset with cnn_dm format (4 examples for train, val, test)
4 | wget https://s3.amazonaws.com/datasets.huggingface.co/summarization/cnn_tiny.tgz
5 | tar -xzvf cnn_tiny.tgz
6 | rm cnn_tiny.tgz
7 |
8 | export OUTPUT_DIR_NAME=bart_utest_output
9 | export CURRENT_DIR=${PWD}
10 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
11 |
12 | # Make output directory if it doesn't exist
13 | mkdir -p $OUTPUT_DIR
14 |
15 | # Add parent directory to python path to access lightning_base.py and utils.py
16 | export PYTHONPATH="../../":"${PYTHONPATH}"
17 | python finetune.py \
18 | --data_dir=cnn_tiny/ \
19 | --model_type=bart \
20 | --model_name_or_path=sshleifer/bart-tiny-random \
21 | --learning_rate=3e-5 \
22 | --train_batch_size=2 \
23 | --eval_batch_size=2 \
24 | --output_dir=$OUTPUT_DIR \
25 | --num_train_epochs=1 \
26 | --n_gpu=0 \
27 | --do_train $@
28 |
29 | rm -rf cnn_tiny
30 | rm -rf $OUTPUT_DIR
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/model_cards/DeepPavlov/rubert-base-cased-sentence/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - russian
4 | ---
5 |
6 | # rubert-base-cased-sentence
7 |
8 | Sentence RuBERT \(Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters\) is a representation‑based sentence encoder for Russian. It is initialized with RuBERT and fine‑tuned on SNLI\[1\] google-translated to russian and on russian part of XNLI dev set\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\].
9 |
10 |
11 | \[1\]: S. R. Bowman, G. Angeli, C. Potts, and C. D. Manning. \(2015\) A large annotated corpus for learning natural language inference. arXiv preprint [arXiv:1508.05326](https://arxiv.org/abs/1508.05326)
12 |
13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053)
14 |
15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084)
16 |
--------------------------------------------------------------------------------
/tests/test_activations.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from transformers import is_torch_available
4 |
5 | from .utils import require_torch
6 |
7 |
8 | if is_torch_available():
9 | from transformers.activations import _gelu_python, get_activation, gelu_new
10 | import torch
11 |
12 |
13 | @require_torch
14 | class TestActivations(unittest.TestCase):
15 | def test_gelu_versions(self):
16 | x = torch.Tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
17 | torch_builtin = get_activation("gelu")
18 | self.assertTrue(torch.eq(_gelu_python(x), torch_builtin(x)).all().item())
19 | self.assertFalse(torch.eq(_gelu_python(x), gelu_new(x)).all().item())
20 |
21 | def test_get_activation(self):
22 | get_activation("swish")
23 | get_activation("relu")
24 | get_activation("tanh")
25 | get_activation("gelu_new")
26 | get_activation("gelu_fast")
27 | with self.assertRaises(KeyError):
28 | get_activation("bogus")
29 | with self.assertRaises(KeyError):
30 | get_activation(None)
31 |
--------------------------------------------------------------------------------
/src/transformers/configuration_marian.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 The OPUS-NMT Team, Marian team, and The HuggingFace Inc. team.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ Marian model configuration """
16 |
17 | from .configuration_bart import BartConfig
18 |
19 |
20 | PRETRAINED_CONFIG_ARCHIVE_MAP = {
21 | "Helsinki-NLP/opus-mt-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json",
22 | }
23 |
24 |
25 | class MarianConfig(BartConfig):
26 | model_type = "marian"
27 |
--------------------------------------------------------------------------------
/model_cards/clue/xlnet_chinese_large/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: chinese
3 | ---
4 |
5 | ## xlnet_chinese_large
6 |
7 | ### Overview
8 |
9 | **Language model:** xlnet-large
10 | **Model size:** 1.3G
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 |
15 | ### Results
16 |
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 |
19 | ### Usage
20 |
21 | ```
22 | import torch
23 | from transformers import XLNetTokenizer,XLNetModel
24 | tokenizer = XLNetTokenizer.from_pretrained("clue/xlnet_chinese_large")
25 | xlnet = XLNetModel.from_pretrained("clue/xlnet_chinese_large")
26 | ```
27 |
28 | ### About CLUE benchmark
29 |
30 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
31 |
32 | Github: https://github.com/CLUEbenchmark
33 | Website: https://www.cluebenchmarks.com/
34 |
--------------------------------------------------------------------------------
/adapter_docs/classes/weights_loaders.rst:
--------------------------------------------------------------------------------
1 | Weights Loaders
2 | =======================
3 |
4 | These classes perform the extraction, saving and loading of module weights to and from the file system.
5 | All type-specific loader classes inherit from the common ``WeightsLoader`` base class which can also be extended
6 | to add support for additional custom modules.
7 |
8 | These classes provide the basis of adapter module integration into model classes such as adapter saving and loading.
9 | Depending on the model, one of these mixins should be implemented by every adapter-supporting model class.
10 |
11 | WeightsLoader
12 | ------------------
13 |
14 | .. autoclass:: transformers.WeightsLoader
15 | :members:
16 |
17 | AdapterLoader
18 | ---------------------------
19 |
20 | .. autoclass:: transformers.AdapterLoader
21 | :members:
22 |
23 | PredictionHeadLoader
24 | ---------------------------
25 |
26 | .. autoclass:: transformers.PredictionHeadLoader
27 | :members:
28 |
29 | WeightsLoaderHelper
30 | -------------------
31 |
32 | .. autoclass:: transformers.WeightsLoaderHelper
33 | :members:
34 |
--------------------------------------------------------------------------------
/model_cards/ViktorAlm/electra-base-norwegian-uncased-discriminator/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: norwegian
3 | thumbnail: https://i.imgur.com/QqSEC5I.png
4 | ---
5 |
6 | # Norwegian Electra
7 | 
8 |
9 | Trained on Oscar + wikipedia + opensubtitles + some other data I had with the awesome power of TPUs(V3-8)
10 |
11 | Use with caution. I have no downstream tasks in Norwegian to test on so I have no idea of its performance yet.
12 | # Model
13 | ## Electra: Pre-training Text Encoders as Discriminators Rather Than Generators
14 | Kevin Clark and Minh-Thang Luong and Quoc V. Le and Christopher D. Manning
15 | - https://openreview.net/pdf?id=r1xMH1BtvB
16 | - https://github.com/google-research/electra
17 | # Acknowledgments
18 | ### TensorFlow Research Cloud
19 | Research supported with Cloud TPUs from Google's TensorFlow Research Cloud (TFRC). Thanks for providing access to the TFRC ❤️
20 | - https://www.tensorflow.org/tfrc
21 | #### OSCAR corpus
22 | - https://oscar-corpus.com/
23 | #### OPUS
24 | - http://opus.nlpl.eu/
25 | - http://www.opensubtitles.org/
26 |
--------------------------------------------------------------------------------
/src/transformers/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import torch
5 |
6 | from transformers.file_utils import WEIGHTS_NAME
7 |
8 |
9 | DIALOGPT_MODELS = ["small", "medium", "large"]
10 |
11 | OLD_KEY = "lm_head.decoder.weight"
12 | NEW_KEY = "lm_head.weight"
13 |
14 |
15 | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
16 | d = torch.load(checkpoint_path)
17 | d[NEW_KEY] = d.pop(OLD_KEY)
18 | os.makedirs(pytorch_dump_folder_path, exist_ok=True)
19 | torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
20 |
21 |
22 | if __name__ == "__main__":
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument("--dialogpt_path", default=".", type=str)
25 | args = parser.parse_args()
26 | for MODEL in DIALOGPT_MODELS:
27 | checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl")
28 | pytorch_dump_folder_path = f"./DialoGPT-{MODEL}"
29 | convert_dialogpt_checkpoint(
30 | checkpoint_path, pytorch_dump_folder_path,
31 | )
32 |
--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-multilingual-cased-sentence/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - multilingual
4 | ---
5 |
6 | # bert-base-multilingual-cased-sentence
7 |
8 | Sentence Multilingual BERT \(101 languages, cased, 12‑layer, 768‑hidden, 12‑heads, 180M parameters\) is a representation‑based sentence encoder for 101 languages of Multilingual BERT. It is initialized with Multilingual BERT and then fine‑tuned on english MultiNLI\[1\] and on dev set of multilingual XNLI\[2\]. Sentence representations are mean pooled token embeddings in the same manner as in Sentence‑BERT\[3\].
9 |
10 |
11 | \[1\]: Williams A., Nangia N. & Bowman S. \(2017\) A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference. arXiv preprint [arXiv:1704.05426](https://arxiv.org/abs/1704.05426)
12 |
13 | \[2\]: Williams A., Bowman S. \(2018\) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint [arXiv:1809.05053](https://arxiv.org/abs/1809.05053)
14 |
15 | \[3\]: N. Reimers, I. Gurevych \(2019\) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint [arXiv:1908.10084](https://arxiv.org/abs/1908.10084)
16 |
--------------------------------------------------------------------------------
/tests/test_adapter_fusion_saving.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from transformers import ADAPTERFUSION_CONFIG_MAP, AdapterType, BertModel, RobertaModel, XLMRobertaModel
4 |
5 | from .utils import require_torch
6 |
7 |
8 | @require_torch
9 | class AdapterFusionModelTest(unittest.TestCase):
10 | model_classes = [BertModel, RobertaModel, XLMRobertaModel]
11 |
12 | def test_model_config_serialization(self):
13 | """PretrainedConfigurations should not raise an Exception when serializing the config dict
14 |
15 | See, e.g., PretrainedConfig.to_json_string()
16 | """
17 | for model_class in self.model_classes:
18 | for k, v in ADAPTERFUSION_CONFIG_MAP.items():
19 | model_config = model_class.config_class
20 | model = model_class(model_config())
21 | model.add_adapter("test1", AdapterType.text_task)
22 | model.add_adapter("test2", AdapterType.text_task)
23 | model.add_fusion(["test1", "test2"], adapter_fusion_config=v)
24 | # should not raise an exception
25 | model.config.to_json_string()
26 |
--------------------------------------------------------------------------------
/examples/token-classification/test_ner_examples.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import sys
3 | import unittest
4 | from unittest.mock import patch
5 |
6 | import run_ner
7 |
8 |
9 | logging.basicConfig(level=logging.INFO)
10 |
11 | logger = logging.getLogger()
12 |
13 |
14 | class ExamplesTests(unittest.TestCase):
15 | def test_run_ner(self):
16 | stream_handler = logging.StreamHandler(sys.stdout)
17 | logger.addHandler(stream_handler)
18 |
19 | testargs = """
20 | --model_name distilbert-base-german-cased
21 | --output_dir ./tests/fixtures/tests_samples/temp_dir
22 | --overwrite_output_dir
23 | --data_dir ./tests/fixtures/tests_samples/GermEval
24 | --labels ./tests/fixtures/tests_samples/GermEval/labels.txt
25 | --max_seq_length 128
26 | --num_train_epochs 6
27 | --logging_steps 1
28 | --do_train
29 | --do_eval
30 | """.split()
31 | with patch.object(sys, "argv", ["run.py"] + testargs):
32 | result = run_ner.main()
33 | self.assertLess(result["eval_loss"], 1.5)
34 |
--------------------------------------------------------------------------------
/model_cards/deepset/bert-base-german-cased-oldvocab/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: german
3 | thumbnail: https://static.tildacdn.com/tild6438-3730-4164-b266-613634323466/german_bert.png
4 | tags:
5 | - exbert
6 | ---
7 |
8 |
9 |
10 |
11 |
12 | # German BERT with old vocabulary
13 | For details see the related [FARM issue](https://github.com/deepset-ai/FARM/issues/60).
14 |
15 |
16 | ## About us
17 | 
18 |
19 | We bring NLP to the industry via open source!
20 | Our focus: Industry specific language models & large scale QA systems.
21 |
22 | Some of our work:
23 | - [German BERT (aka "bert-base-german-cased")](https://deepset.ai/german-bert)
24 | - [FARM](https://github.com/deepset-ai/FARM)
25 | - [Haystack](https://github.com/deepset-ai/haystack/)
26 |
27 | Get in touch:
28 | [Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Website](https://deepset.ai)
29 |
--------------------------------------------------------------------------------
/docs/source/model_doc/encoderdecoder.rst:
--------------------------------------------------------------------------------
1 | Encoder Decoder Models
2 | -----------
3 |
4 | This class can wrap an encoder model, such as ``BertModel`` and a decoder modeling with a language modeling head, such as ``BertForMaskedLM`` into a encoder-decoder model.
5 |
6 | The ``EncoderDecoderModel`` class allows to instantiate a encoder decoder model using the ``from_encoder_decoder_pretrain`` class method taking a pretrained encoder and pretrained decoder model as an input.
7 | The ``EncoderDecoderModel`` is saved using the standard ``save_pretrained()`` method and can also again be loaded using the standard ``from_pretrained()`` method.
8 |
9 | An application of this architecture could be *summarization* using two pretrained Bert models as is shown in the paper: `Text Summarization with Pretrained Encoders `_ by Yang Liu and Mirella Lapata.
10 |
11 |
12 | ``EncoderDecoderConfig``
13 | ~~~~~~~~~~~~~~~~~~~~~
14 |
15 | .. autoclass:: transformers.EncoderDecoderConfig
16 | :members:
17 |
18 |
19 | ``EncoderDecoderModel``
20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
21 |
22 | .. autoclass:: transformers.EncoderDecoderModel
23 | :members:
24 |
--------------------------------------------------------------------------------
/examples/text-classification/run_pl.sh:
--------------------------------------------------------------------------------
1 | # Install newest ptl.
2 | pip install -U git+http://github.com/PyTorchLightning/pytorch-lightning/
3 | # Install example requirements
4 | pip install -r ../requirements.txt
5 |
6 | # Download glue data
7 | python3 ../../utils/download_glue_data.py
8 |
9 | export TASK=mrpc
10 | export DATA_DIR=./glue_data/MRPC/
11 | export MAX_LENGTH=128
12 | export LEARNING_RATE=2e-5
13 | export BERT_MODEL=bert-base-cased
14 | export BATCH_SIZE=32
15 | export NUM_EPOCHS=3
16 | export SEED=2
17 | export OUTPUT_DIR_NAME=mrpc-pl-bert
18 | export CURRENT_DIR=${PWD}
19 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
20 |
21 | # Make output directory if it doesn't exist
22 | mkdir -p $OUTPUT_DIR
23 | # Add parent directory to python path to access lightning_base.py
24 | export PYTHONPATH="../":"${PYTHONPATH}"
25 |
26 | python3 run_pl_glue.py --data_dir $DATA_DIR \
27 | --task $TASK \
28 | --model_name_or_path $BERT_MODEL \
29 | --output_dir $OUTPUT_DIR \
30 | --max_seq_length $MAX_LENGTH \
31 | --learning_rate $LEARNING_RATE \
32 | --num_train_epochs $NUM_EPOCHS \
33 | --train_batch_size $BATCH_SIZE \
34 | --seed $SEED \
35 | --do_train \
36 | --do_predict
37 |
--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/STS-B/train.tsv:
--------------------------------------------------------------------------------
1 | index genre filename year old_index source1 source2 sentence1 sentence2 score
2 | 0 main-captions MSRvid 2012test 0001 none none A plane is taking off. An air plane is taking off. 5.000
3 | 1 main-captions MSRvid 2012test 0004 none none A man is playing a large flute. A man is playing a flute. 3.800
4 | 2 main-captions MSRvid 2012test 0005 none none A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 3.800
5 | 3 main-captions MSRvid 2012test 0006 none none Three men are playing chess. Two men are playing chess. 2.600
6 | 4 main-captions MSRvid 2012test 0009 none none A man is playing the cello. A man seated is playing the cello. 4.250
7 | 5 main-captions MSRvid 2012test 0011 none none Some men are fighting. Two men are fighting. 4.250
8 | 6 main-captions MSRvid 2012test 0012 none none A man is smoking. A man is skating. 0.500
9 | 7 main-captions MSRvid 2012test 0013 none none The man is playing the piano. The man is playing the guitar. 1.600
10 | 8 main-captions MSRvid 2012test 0014 none none A man is playing on a guitar and singing. A woman is playing an acoustic guitar and singing. 2.200
11 |
--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/STS-B/dev.tsv:
--------------------------------------------------------------------------------
1 | index genre filename year old_index source1 source2 sentence1 sentence2 score
2 | 0 main-captions MSRvid 2012test 0000 none none A man with a hard hat is dancing. A man wearing a hard hat is dancing. 5.000
3 | 1 main-captions MSRvid 2012test 0002 none none A young child is riding a horse. A child is riding a horse. 4.750
4 | 2 main-captions MSRvid 2012test 0003 none none A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 5.000
5 | 3 main-captions MSRvid 2012test 0007 none none A woman is playing the guitar. A man is playing guitar. 2.400
6 | 4 main-captions MSRvid 2012test 0008 none none A woman is playing the flute. A man is playing a flute. 2.750
7 | 5 main-captions MSRvid 2012test 0010 none none A woman is cutting an onion. A man is cutting onions. 2.615
8 | 6 main-captions MSRvid 2012test 0015 none none A man is erasing a chalk board. The man is erasing the chalk board. 5.000
9 | 7 main-captions MSRvid 2012test 0023 none none A woman is carrying a boy. A woman is carrying her baby. 2.333
10 | 8 main-captions MSRvid 2012test 0027 none none Three men are playing guitars. Three men are on stage playing guitars. 3.750
11 |
--------------------------------------------------------------------------------
/model_cards/canwenxu/BERT-of-Theseus-MNLI/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | thumbnail: https://raw.githubusercontent.com/JetRunner/BERT-of-Theseus/master/bert-of-theseus.png
3 | ---
4 |
5 | # BERT-of-Theseus
6 | See our paper ["BERT-of-Theseus: Compressing BERT by Progressive Module Replacing"](http://arxiv.org/abs/2002.02925).
7 |
8 | BERT-of-Theseus is a new compressed BERT by progressively replacing the components of the original BERT.
9 |
10 | 
11 |
12 | ## Load Pretrained Model on MNLI
13 |
14 | We provide a 6-layer pretrained model on MNLI as a general-purpose model, which can transfer to other sentence classification tasks, outperforming DistillBERT (with the same 6-layer structure) on six tasks of GLUE (dev set).
15 |
16 | | Method | MNLI | MRPC | QNLI | QQP | RTE | SST-2 | STS-B |
17 | |-----------------|------|------|------|------|------|-------|-------|
18 | | BERT-base | 83.5 | 89.5 | 91.2 | 89.8 | 71.1 | 91.5 | 88.9 |
19 | | DistillBERT | 79.0 | 87.5 | 85.3 | 84.9 | 59.9 | 90.7 | 81.2 |
20 | | BERT-of-Theseus | 82.1 | 87.5 | 88.8 | 88.8 | 70.1 | 91.8 | 87.8 |
21 |
--------------------------------------------------------------------------------
/model_cards/clue/roberta_chinese_base/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: chinese
3 | ---
4 |
5 | ## roberta_chinese_base
6 |
7 | ### Overview
8 |
9 | **Language model:** roberta-base
10 | **Model size:** 392M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 |
15 | ### Results
16 |
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 |
19 | ### Usage
20 |
21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!!
22 |
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, BertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_base")
27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_base")
28 | ```
29 |
30 | ### About CLUE benchmark
31 |
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 |
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 |
--------------------------------------------------------------------------------
/model_cards/clue/roberta_chinese_large/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: chinese
3 | ---
4 |
5 | ## roberta_chinese_large
6 |
7 | ### Overview
8 |
9 | **Language model:** roberta-large
10 | **Model size:** 1.2G
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 |
15 | ### Results
16 |
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 |
19 | ### Usage
20 |
21 | **NOTE:** You have to call **BertTokenizer** instead of RobertaTokenizer !!!
22 |
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, BertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/roberta_chinese_large")
27 | roberta = BertModel.from_pretrained("clue/roberta_chinese_large")
28 | ```
29 |
30 | ### About CLUE benchmark
31 |
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 |
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 |
--------------------------------------------------------------------------------
/model_cards/surajp/albert-base-sanskrit/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: sanskrit
3 | ---
4 |
5 |
6 | # ALBERT-base-Sanskrit
7 |
8 |
9 | Explaination Notebook Colab: [SanskritALBERT.ipynb](https://colab.research.google.com/github/parmarsuraj99/suraj-parmar/blob/master/_notebooks/2020-05-02-SanskritALBERT.ipynb)
10 |
11 | Size of the model is **46MB**
12 |
13 | Example of usage:
14 |
15 | ```
16 | tokenizer = AutoTokenizer.from_pretrained("surajp/albert-base-sanskrit")
17 | model = AutoModel.from_pretrained("surajp/albert-base-sanskrit")
18 |
19 | enc=tokenizer.encode("ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥")
20 | print(tokenizer.decode(enc))
21 |
22 | ps = model(torch.tensor(enc).unsqueeze(1))
23 | print(ps[0].shape)
24 | ```
25 | ```
26 | '''
27 | Output:
28 | --------
29 | [CLS] ॐ सर्वे भवन्तु सुखिनः सर्वे सन्तु निरामयाः । सर्वे भद्राणि पश्यन्तु मा कश्चिद्दुःखभाग्भवेत् । ॐ शान्तिः शान्तिः शान्तिः ॥[SEP]
30 | torch.Size([28, 1, 768])
31 | ```
32 |
33 |
34 | > Created by [Suraj Parmar/@parmarsuraj99](https://twitter.com/parmarsuraj99)
35 |
36 | > Made with ♥ in India
37 |
--------------------------------------------------------------------------------
/examples/benchmarking/run_benchmark.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The HuggingFace Inc. team.
3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ Benchmarking the library on inference and training """
17 |
18 | from transformers import HfArgumentParser, PyTorchBenchmark, PyTorchBenchmarkArguments
19 |
20 |
21 | def main():
22 | parser = HfArgumentParser(PyTorchBenchmarkArguments)
23 | benchmark_args = parser.parse_args_into_dataclasses()[0]
24 | benchmark = PyTorchBenchmark(args=benchmark_args)
25 | benchmark.run()
26 |
27 |
28 | if __name__ == "__main__":
29 | main()
30 |
--------------------------------------------------------------------------------
/model_cards/wptoux/albert-chinese-large-qa/README.md:
--------------------------------------------------------------------------------
1 | # albert-chinese-large-qa
2 | Albert large QA model pretrained from baidu webqa and baidu dureader datasets.
3 |
4 | ## Data source
5 | + baidu webqa 1.0
6 | + baidu dureader
7 |
8 | ## Traing Method
9 | We combined the two datasets together and created a new dataset in squad format, including 705139 samples for training and 69638 samples for validation.
10 | We finetune the model based on the albert chinese large model.
11 |
12 | ## Hyperparams
13 | + learning_rate 1e-5
14 | + max_seq_length 512
15 | + max_query_length 50
16 | + max_answer_length 300
17 | + doc_stride 256
18 | + num_train_epochs 2
19 | + warmup_steps 1000
20 | + per_gpu_train_batch_size 8
21 | + gradient_accumulation_steps 3
22 | + n_gpu 2 (Nvidia Tesla P100)
23 |
24 | ## Usage
25 | ```
26 | from transformers import AutoModelForQuestionAnswering, BertTokenizer
27 |
28 | model = AutoModelForQuestionAnswering.from_pretrained('wptoux/albert-chinese-large-qa')
29 | tokenizer = BertTokenizer.from_pretrained('wptoux/albert-chinese-large-qa')
30 | ```
31 | ***Important: use BertTokenizer***
32 |
33 | ## MoreInfo
34 | Please visit https://github.com/wptoux/albert-chinese-large-webqa for details.
35 |
--------------------------------------------------------------------------------
/model_cards/clue/albert_chinese_tiny/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: chinese
3 | ---
4 |
5 | ## albert_chinese_tiny
6 |
7 | ### Overview
8 |
9 | **Language model:** albert-tiny
10 | **Model size:** 16M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 |
15 | ### Results
16 |
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 |
19 | ### Usage
20 |
21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_tiny` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!!
22 |
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, AlbertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_tiny")
27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_tiny")
28 | ```
29 |
30 | ### About CLUE benchmark
31 |
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 |
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 |
--------------------------------------------------------------------------------
/model_cards/allenai/scibert_scivocab_cased/README.md:
--------------------------------------------------------------------------------
1 | # SciBERT
2 |
3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text.
4 |
5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts.
6 |
7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions.
8 |
9 | Available models include:
10 | * `scibert_scivocab_cased`
11 | * `scibert_scivocab_uncased`
12 |
13 |
14 | The original repo can be found [here](https://github.com/allenai/scibert).
15 |
16 | If using these models, please cite the following paper:
17 | ```
18 | @inproceedings{beltagy-etal-2019-scibert,
19 | title = "SciBERT: A Pretrained Language Model for Scientific Text",
20 | author = "Beltagy, Iz and Lo, Kyle and Cohan, Arman",
21 | booktitle = "EMNLP",
22 | year = "2019",
23 | publisher = "Association for Computational Linguistics",
24 | url = "https://www.aclweb.org/anthology/D19-1371"
25 | }
26 | ```
27 |
--------------------------------------------------------------------------------
/model_cards/clue/albert_chinese_small/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: chinese
3 | ---
4 |
5 | ## albert_chinese_small
6 |
7 | ### Overview
8 |
9 | **Language model:** albert-small
10 | **Model size:** 18.5M
11 | **Language:** Chinese
12 | **Training data:** [CLUECorpusSmall](https://github.com/CLUEbenchmark/CLUECorpus2020)
13 | **Eval data:** [CLUE dataset](https://github.com/CLUEbenchmark/CLUE)
14 |
15 | ### Results
16 |
17 | For results on downstream tasks like text classification, please refer to [this repository](https://github.com/CLUEbenchmark/CLUE).
18 |
19 | ### Usage
20 |
21 | **NOTE:**Since sentencepiece is not used in `albert_chinese_small` model, you have to call **BertTokenizer** instead of AlbertTokenizer !!!
22 |
23 | ```
24 | import torch
25 | from transformers import BertTokenizer, AlbertModel
26 | tokenizer = BertTokenizer.from_pretrained("clue/albert_chinese_small")
27 | albert = AlbertModel.from_pretrained("clue/albert_chinese_small")
28 | ```
29 |
30 | ### About CLUE benchmark
31 |
32 | Organization of Language Understanding Evaluation benchmark for Chinese: tasks & datasets, baselines, pre-trained Chinese models, corpus and leaderboard.
33 |
34 | Github: https://github.com/CLUEbenchmark
35 | Website: https://www.cluebenchmarks.com/
36 |
--------------------------------------------------------------------------------
/model_cards/allenai/scibert_scivocab_uncased/README.md:
--------------------------------------------------------------------------------
1 | # SciBERT
2 |
3 | This is the pretrained model presented in [SciBERT: A Pretrained Language Model for Scientific Text](https://www.aclweb.org/anthology/D19-1371/), which is a BERT model trained on scientific text.
4 |
5 | The training corpus was papers taken from [Semantic Scholar](https://www.semanticscholar.org). Corpus size is 1.14M papers, 3.1B tokens. We use the full text of the papers in training, not just abstracts.
6 |
7 | SciBERT has its own wordpiece vocabulary (scivocab) that's built to best match the training corpus. We trained cased and uncased versions.
8 |
9 | Available models include:
10 | * `scibert_scivocab_cased`
11 | * `scibert_scivocab_uncased`
12 |
13 |
14 | The original repo can be found [here](https://github.com/allenai/scibert).
15 |
16 | If using these models, please cite the following paper:
17 | ```
18 | @inproceedings{beltagy-etal-2019-scibert,
19 | title = "SciBERT: A Pretrained Language Model for Scientific Text",
20 | author = "Beltagy, Iz and Lo, Kyle and Cohan, Arman",
21 | booktitle = "EMNLP",
22 | year = "2019",
23 | publisher = "Association for Computational Linguistics",
24 | url = "https://www.aclweb.org/anthology/D19-1371"
25 | }
26 | ```
27 |
--------------------------------------------------------------------------------
/model_cards/julien-c/EsperBERTo-small-pos/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: esperanto
3 | thumbnail: https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png
4 | ---
5 |
6 | # EsperBERTo: RoBERTa-like Language model trained on Esperanto
7 |
8 | **Companion model to blog post https://huggingface.co/blog/how-to-train** 🔥
9 |
10 | ## Training Details
11 |
12 | - current checkpoint: 566000
13 | - machine name: `galinette`
14 |
15 |
16 | 
17 |
18 | ## Example pipeline
19 |
20 | ```python
21 | from transformers import TokenClassificationPipeline, pipeline
22 |
23 |
24 | MODEL_PATH = "./models/EsperBERTo-small-pos/"
25 |
26 | nlp = pipeline(
27 | "ner",
28 | model=MODEL_PATH,
29 | tokenizer=MODEL_PATH,
30 | )
31 | # or instantiate a TokenClassificationPipeline directly.
32 |
33 | nlp("Mi estas viro kej estas tago varma.")
34 |
35 | # {'entity': 'PRON', 'score': 0.9979867339134216, 'word': ' Mi'}
36 | # {'entity': 'VERB', 'score': 0.9683094620704651, 'word': ' estas'}
37 | # {'entity': 'VERB', 'score': 0.9797462821006775, 'word': ' estas'}
38 | # {'entity': 'NOUN', 'score': 0.8509314060211182, 'word': ' tago'}
39 | # {'entity': 'ADJ', 'score': 0.9996201395988464, 'word': ' varma'}
40 | ```
--------------------------------------------------------------------------------
/model_cards/DeepPavlov/bert-base-cased-conversational/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - english
4 | ---
5 |
6 | # bert-base-cased-conversational
7 |
8 | Conversational BERT \(English, cased, 12‑layer, 768‑hidden, 12‑heads, 110M parameters\) was trained on the English part of Twitter, Reddit, DailyDialogues\[1\], OpenSubtitles\[2\], Debates\[3\], Blogs\[4\], Facebook News Comments. We used this training data to build the vocabulary of English subtokens and took English cased version of BERT‑base as an initialization for English Conversational BERT.
9 |
10 |
11 | \[1\]: Yanran Li, Hui Su, Xiaoyu Shen, Wenjie Li, Ziqiang Cao, and Shuzi Niu. DailyDialog: A Manually Labelled Multi-turn Dialogue Dataset. IJCNLP 2017.
12 |
13 | \[2\]: P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation \(LREC 2016\)
14 |
15 | \[3\]: Justine Zhang, Ravi Kumar, Sujith Ravi, Cristian Danescu-Niculescu-Mizil. Proceedings of NAACL, 2016.
16 |
17 | \[4\]: J. Schler, M. Koppel, S. Argamon and J. Pennebaker \(2006\). Effects of Age and Gender on Blogging in Proceedings of 2006 AAAI Spring Symposium on Computational Approaches for Analyzing Weblogs.
18 |
--------------------------------------------------------------------------------
/examples/summarization/t5/download_cnn_daily_mail.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pathlib import Path
3 |
4 | import tensorflow_datasets as tfds
5 |
6 |
7 | def main(input_path, reference_path, data_dir):
8 | cnn_ds = tfds.load("cnn_dailymail", split="test", shuffle_files=False, data_dir=data_dir)
9 | cnn_ds_iter = tfds.as_numpy(cnn_ds)
10 |
11 | test_articles_file = Path(input_path).open("w")
12 | test_summaries_file = Path(reference_path).open("w")
13 |
14 | for example in cnn_ds_iter:
15 | test_articles_file.write(example["article"].decode("utf-8") + "\n")
16 | test_articles_file.flush()
17 | test_summaries_file.write(example["highlights"].decode("utf-8").replace("\n", " ") + "\n")
18 | test_summaries_file.flush()
19 |
20 |
21 | if __name__ == "__main__":
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument("input_path", type=str, help="where to save the articles input data")
24 | parser.add_argument(
25 | "reference_path", type=str, help="where to save the reference summaries",
26 | )
27 | parser.add_argument(
28 | "--data_dir", type=str, default="~/tensorflow_datasets", help="where to save the tensorflow datasets.",
29 | )
30 | args = parser.parse_args()
31 | main(args.input_path, args.reference_path, args.data_dir)
32 |
--------------------------------------------------------------------------------
/model_cards/illuin/camembert-base-fquad/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: french
3 | ---
4 |
5 | # camembert-base-fquad
6 |
7 | ## Description
8 |
9 | A native French Question Answering model [CamemBERT-base](https://camembert-model.fr/) fine-tuned on [FQuAD](https://fquad.illuin.tech/).
10 |
11 | ## Evaluation results
12 |
13 | On the development set.
14 |
15 | ```shell
16 | {"f1": 88.1, "exact_match": 78.1}
17 | ```
18 |
19 | On the test set.
20 |
21 | ```shell
22 | {"f1": 88.3, "exact_match": 78.0}
23 | ```
24 |
25 | ## Usage
26 |
27 | ```python
28 | from transformers import pipeline
29 |
30 | nlp = pipeline('question-answering', model='illuin/camembert-base-fquad', tokenizer='illuin/camembert-base-fquad')
31 |
32 | nlp({
33 | 'question': "Qui est Claude Monet?",
34 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
35 | })
36 | ```
37 |
38 | ## Citation
39 |
40 | If you use our work, please cite:
41 |
42 | ```bibtex
43 | @article{dHoffschmidt2020FQuADFQ,
44 | title={FQuAD: French Question Answering Dataset},
45 | author={Martin d'Hoffschmidt and Maxime Vidal and Wacim Belblidia and Tom Brendl'e and Quentin Heinrich},
46 | journal={ArXiv},
47 | year={2020},
48 | volume={abs/2002.06071}
49 | }
50 | ```
51 |
--------------------------------------------------------------------------------
/model_cards/allenai/longformer-base-4096/README.md:
--------------------------------------------------------------------------------
1 |
2 | # longformer-base-4096
3 | [Longformer](https://arxiv.org/abs/2004.05150) is a transformer model for long documents.
4 |
5 | `longformer-base-4096` is a BERT-like model started from the RoBERTa checkpoint and pretrained for MLM on long documents. It supports sequences of length up to 4,096.
6 |
7 | Longformer uses a combination of a sliding window (local) attention and global attention. Global attention is user-configured based on the task to allow the model to learn task-specific representations.
8 | Please refer to the examples in `modeling_longformer.py` and the paper for more details on how to set global attention.
9 |
10 |
11 | ### Citing
12 |
13 | If you use `Longformer` in your research, please cite [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150).
14 | ```
15 | @article{Beltagy2020Longformer,
16 | title={Longformer: The Long-Document Transformer},
17 | author={Iz Beltagy and Matthew E. Peters and Arman Cohan},
18 | journal={arXiv:2004.05150},
19 | year={2020},
20 | }
21 | ```
22 |
23 | `Longformer` is an open-source project developed by [the Allen Institute for Artificial Intelligence (AI2)](http://www.allenai.org).
24 | AI2 is a non-profit institute with the mission to contribute to humanity through high-impact AI research and engineering.
25 |
--------------------------------------------------------------------------------
/src/transformers/commands/transformers_cli.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from argparse import ArgumentParser
3 |
4 | from transformers.commands.convert import ConvertCommand
5 | from transformers.commands.download import DownloadCommand
6 | from transformers.commands.env import EnvironmentCommand
7 | from transformers.commands.run import RunCommand
8 | from transformers.commands.serving import ServeCommand
9 | from transformers.commands.user import UserCommands
10 |
11 |
12 | def main():
13 | parser = ArgumentParser("Transformers CLI tool", usage="transformers-cli []")
14 | commands_parser = parser.add_subparsers(help="transformers-cli command helpers")
15 |
16 | # Register commands
17 | ConvertCommand.register_subcommand(commands_parser)
18 | DownloadCommand.register_subcommand(commands_parser)
19 | EnvironmentCommand.register_subcommand(commands_parser)
20 | RunCommand.register_subcommand(commands_parser)
21 | ServeCommand.register_subcommand(commands_parser)
22 | UserCommands.register_subcommand(commands_parser)
23 |
24 | # Let's go
25 | args = parser.parse_args()
26 |
27 | if not hasattr(args, "func"):
28 | parser.print_help()
29 | exit(1)
30 |
31 | # Run
32 | service = args.func(args)
33 | service.run()
34 |
35 |
36 | if __name__ == "__main__":
37 | main()
38 |
--------------------------------------------------------------------------------
/model_cards/jplu/tf-camembert-base/README.md:
--------------------------------------------------------------------------------
1 | # Tensorflow CamemBERT
2 |
3 | In this repository you will find different versions of the CamemBERT model for Tensorflow.
4 |
5 | ## CamemBERT
6 |
7 | [CamemBERT](https://camembert-model.fr/) is a state-of-the-art language model for French based on the RoBERTa architecture pretrained on the French subcorpus of the newly available multilingual corpus OSCAR.
8 |
9 | ## Model Weights
10 |
11 | | Model | Downloads
12 | | -------------------------------- | ---------------------------------------------------------------------------------------------------------------
13 | | `jplu/tf-camembert-base` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-camembert-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-camembert-base/tf_model.h5)
14 |
15 | ## Usage
16 |
17 | With Transformers >= 2.4 the Tensorflow models of CamemBERT can be loaded like:
18 |
19 | ```python
20 | from transformers import TFCamembertModel
21 |
22 | model = TFCamembertModel.from_pretrained("jplu/tf-camembert-base")
23 | ```
24 |
25 | ## Huggingface model hub
26 |
27 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu).
28 |
29 | ## Acknowledgments
30 |
31 | Thanks to all the Huggingface team for the support and their amazing library!
32 |
--------------------------------------------------------------------------------
/model_cards/google/reformer-crime-and-punishment/README.md:
--------------------------------------------------------------------------------
1 | ## Reformer Model trained on "Crime and Punishment"
2 |
3 | Crime and Punishment is a novel written by Fyodor Dostoevsky and was translated into English.
4 |
5 | Crime and Punishment training data was taken from `gs://trax-ml/reformer/crime-and-punishment-2554.txt` and contains
6 | roughly 0.5M tokens.
7 |
8 | The ReformerLM model was trained in flax using colab notebook proposed by authors: https://colab.research.google.com/github/google/trax/blob/master/trax/models/reformer/text_generation.ipynb and the weights were converted to Hugging Face's PyTorch ReformerLM model `ReformerModelWithLMHead`.
9 |
10 | The model is a language model that operates on small sub-word units. Text can be generated as follows:
11 |
12 | ```python
13 | model = ReformerModelWithLMHead.from_pretrained("patrickvonplaten/reformer-crime-and-punish")
14 | tok = ReformerTokenizer.from_pretrained("patrickvonplaten/reformer-crime-and-punish")
15 | tok.decode(model.generate(tok.encode("A few months later", return_tensors="pt"), do_sample=True,temperature=0.7, max_length=100)[0])
16 |
17 | # gives:'A few months later on was more than anything in the flat.
18 | # “I have already.” “That’s not my notion that he had forgotten him.
19 | # What does that matter? And why do you mean? It’s only another fellow,” he said as he went out, as though he want'
20 | ```
21 |
--------------------------------------------------------------------------------
/tests/test_adapter_fusion_config.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from dataclasses import FrozenInstanceError
3 |
4 | from transformers import ADAPTERFUSION_CONFIG_MAP, AdapterFusionConfig
5 |
6 | from .utils import require_torch
7 |
8 |
9 | @require_torch
10 | class AdapterFusionConfigTest(unittest.TestCase):
11 |
12 | config_names = ADAPTERFUSION_CONFIG_MAP.keys()
13 |
14 | def test_config_load(self):
15 | for config_name in self.config_names:
16 | with self.subTest(config_name=config_name):
17 | config = AdapterFusionConfig.load(config_name, temperature=True)
18 | self.assertTrue(isinstance(config, AdapterFusionConfig))
19 | self.assertEqual(config.temperature, True)
20 |
21 | def test_config_immutable(self):
22 | def set_attr(config: AdapterFusionConfig):
23 | config.temperature = True
24 |
25 | for config in ADAPTERFUSION_CONFIG_MAP.values():
26 | with self.subTest(config=config.__class__.__name__):
27 | self.assertRaises(FrozenInstanceError, lambda: set_attr(config))
28 |
29 | def test_custom_attr(self):
30 | for config in ADAPTERFUSION_CONFIG_MAP.values():
31 | with self.subTest(config=config.__class__.__name__):
32 | config.dummy_attr = "test_value"
33 | self.assertEqual(config.dummy_attr, "test_value")
34 |
--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-imdb-pos/README.md:
--------------------------------------------------------------------------------
1 | # GPT2-IMDB-pos
2 |
3 | ## What is it?
4 | A small GPT2 (`lvwerra/gpt2-imdb`) language model fine-tuned to produce positive movie reviews based the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). The model is trained with rewards from a BERT sentiment classifier (`lvwerra/gpt2-imdb`) via PPO.
5 |
6 | ## Training setting
7 | The model was trained for `100` optimisation steps with a batch size of `256` which corresponds to `25600` training samples. The full experiment setup can be found in the Jupyter notebook in the [trl repo](https://lvwerra.github.io/trl/04-gpt2-sentiment-ppo-training/).
8 |
9 | ## Examples
10 | A few examples of the model response to a query before and after optimisation:
11 |
12 | | query | response (before) | response (after) | rewards (before) | rewards (after) |
13 | |-------|-------------------|------------------|------------------|-----------------|
14 | |I'd never seen a |heavier, woodier example of Victorian archite... |film of this caliber, and I think it's wonder... |3.297736 |4.158653|
15 | |I love John's work |but I actually have to write language as in w... |and I hereby recommend this film. I am really... |-1.904006 |4.159198 |
16 | |I's a big struggle |to see anyone who acts in that way. by Jim Th... |, but overall I'm happy with the changes even ... |-1.595925 |2.651260|
17 |
18 |
19 |
--------------------------------------------------------------------------------
/model_cards/twmkn9/albert-base-v2-squad2/README.md:
--------------------------------------------------------------------------------
1 | This model is [ALBERT base v2](https://huggingface.co/albert-base-v2) trained on SQuAD v2 as:
2 |
3 | ```
4 | export SQUAD_DIR=../../squad2
5 | python3 run_squad.py
6 | --model_type albert
7 | --model_name_or_path albert-base-v2
8 | --do_train
9 | --do_eval
10 | --overwrite_cache
11 | --do_lower_case
12 | --version_2_with_negative
13 | --save_steps 100000
14 | --train_file $SQUAD_DIR/train-v2.0.json
15 | --predict_file $SQUAD_DIR/dev-v2.0.json
16 | --per_gpu_train_batch_size 8
17 | --num_train_epochs 3
18 | --learning_rate 3e-5
19 | --max_seq_length 384
20 | --doc_stride 128
21 | --output_dir ./tmp/albert_fine/
22 | ```
23 |
24 | Performance on a dev subset is close to the original paper:
25 |
26 | ```
27 | Results:
28 | {
29 | 'exact': 78.71010200723923,
30 | 'f1': 81.89228117126069,
31 | 'total': 6078,
32 | 'HasAns_exact': 75.39518900343643,
33 | 'HasAns_f1': 82.04167868004215,
34 | 'HasAns_total': 2910,
35 | 'NoAns_exact': 81.7550505050505,
36 | 'NoAns_f1': 81.7550505050505,
37 | 'NoAns_total': 3168,
38 | 'best_exact': 78.72655478775913,
39 | 'best_exact_thresh': 0.0,
40 | 'best_f1': 81.90873395178066,
41 | 'best_f1_thresh': 0.0
42 | }
43 | ```
44 |
45 | We are hopeful this might save you time, energy, and compute. Cheers!
--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/MRPC/dev.tsv:
--------------------------------------------------------------------------------
1 | Quality #1 ID #2 ID #1 String #2 String
2 | 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy .
3 | 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 |
--------------------------------------------------------------------------------
/tests/fixtures/tests_samples/MRPC/train.tsv:
--------------------------------------------------------------------------------
1 | Quality #1 ID #2 ID #1 String #2 String
2 | 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy .
3 | 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 |
--------------------------------------------------------------------------------
/examples/summarization/t5/README.md:
--------------------------------------------------------------------------------
1 | ***This script evaluates the the multitask pre-trained checkpoint for ``t5-base`` (see paper [here](https://arxiv.org/pdf/1910.10683.pdf)) on the CNN/Daily Mail test dataset. Please note that the results in the paper were attained using a model fine-tuned on summarization, so that results will be worse here by approx. 0.5 ROUGE points***
2 |
3 | ### Get the CNN Data
4 | First, you need to download the CNN data. It's about ~400 MB and can be downloaded by
5 | running
6 |
7 | ```bash
8 | python download_cnn_daily_mail.py cnn_articles_input_data.txt cnn_articles_reference_summaries.txt
9 | ```
10 |
11 | You should confirm that each file has 11490 lines:
12 |
13 | ```bash
14 | wc -l cnn_articles_input_data.txt # should print 11490
15 | wc -l cnn_articles_reference_summaries.txt # should print 11490
16 | ```
17 |
18 | ### Generating Summaries
19 |
20 | To create summaries for each article in dataset, run:
21 | ```bash
22 | python evaluate_cnn.py cnn_articles_input_data.txt cnn_generated_articles_summaries.txt cnn_articles_reference_summaries.txt rouge_score.txt
23 | ```
24 | The default batch size, 8, fits in 16GB GPU memory, but may need to be adjusted to fit your system.
25 | The rouge scores "rouge1, rouge2, rougeL" are automatically created and saved in ``rouge_score.txt``.
26 |
27 |
28 | ### Finetuning
29 | Pass model_type=t5 and model `examples/summarization/bart/finetune.py`
30 |
--------------------------------------------------------------------------------
/model_cards/digitalepidemiologylab/covid-twitter-bert/README.md:
--------------------------------------------------------------------------------
1 | # COVID-Twitter-BERT (CT-BERT)
2 | BERT-large-uncased model, pretrained on a corpus of messages from Twitter about COVID-19
3 |
4 | ## Overview
5 | This model was trained on 160M tweets collected between January 12 and April 16, 2020 containing at least one of the keywords "wuhan", "ncov", "coronavirus", "covid", or "sars-cov-2". These tweets were filtered and preprocessed to reach a final sample of 22.5M tweets (containing 40.7M sentences and 633M tokens) which were used for training.
6 |
7 | This model was evaluated based on downstream classification tasks, but it could be used for any other NLP task which can leverage contextual embeddings.
8 |
9 | In order to achieve best results, make sure to use the same text preprocessing as we did for pretraining. This involves replacing user mentions, urls and emojis. You can find a script on our projects [GitHub repo](https://github.com/digitalepidemiologylab/covid-twitter-bert).
10 |
11 | ## Example usage
12 | ```python
13 | tokenizer = AutoTokenizer.from_pretrained("digitalepidemiologylab/covid-twitter-bert")
14 | model = TFAutoModel.from_pretrained("digitalepidemiologylab/covid-twitter-bert")
15 | ```
16 |
17 | ## References
18 | [1] Martin Müller, Marcel Salaté, Per E Kummervold. "COVID-Twitter-BERT: A Natural Language Processing Model to Analyse COVID-19 Content on Twitter" arXiv preprint arXiv:2005.07503 (2020).
19 |
--------------------------------------------------------------------------------
/model_cards/twmkn9/bert-base-uncased-squad2/README.md:
--------------------------------------------------------------------------------
1 | This model is [BERT base uncased](https://huggingface.co/bert-base-uncased) trained on SQuAD v2 as:
2 |
3 | ```
4 | export SQUAD_DIR=../../squad2
5 | python3 run_squad.py
6 | --model_type bert
7 | --model_name_or_path bert-base-uncased
8 | --do_train
9 | --do_eval
10 | --overwrite_cache
11 | --do_lower_case
12 | --version_2_with_negative
13 | --save_steps 100000
14 | --train_file $SQUAD_DIR/train-v2.0.json
15 | --predict_file $SQUAD_DIR/dev-v2.0.json
16 | --per_gpu_train_batch_size 8
17 | --num_train_epochs 3
18 | --learning_rate 3e-5
19 | --max_seq_length 384
20 | --doc_stride 128
21 | --output_dir ./tmp/bert_fine_tuned/
22 | ```
23 |
24 | Performance on a dev subset is close to the original paper:
25 |
26 | ```
27 | Results:
28 | {
29 | 'exact': 72.35932872655479,
30 | 'f1': 75.75355132564763,
31 | 'total': 6078,
32 | 'HasAns_exact': 74.29553264604812,
33 | 'HasAns_f1': 81.38490892002987,
34 | 'HasAns_total': 2910,
35 | 'NoAns_exact': 70.58080808080808,
36 | 'NoAns_f1': 70.58080808080808,
37 | 'NoAns_total': 3168,
38 | 'best_exact': 72.35932872655479,
39 | 'best_exact_thresh': 0.0,
40 | 'best_f1': 75.75355132564766,
41 | 'best_f1_thresh': 0.0
42 | }
43 | ```
44 |
45 | We are hopeful this might save you time, energy, and compute. Cheers!
--------------------------------------------------------------------------------
/src/transformers/commands/download.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 |
3 | from transformers.commands import BaseTransformersCLICommand
4 |
5 |
6 | def download_command_factory(args):
7 | return DownloadCommand(args.model, args.cache_dir, args.force)
8 |
9 |
10 | class DownloadCommand(BaseTransformersCLICommand):
11 | @staticmethod
12 | def register_subcommand(parser: ArgumentParser):
13 | download_parser = parser.add_parser("download")
14 | download_parser.add_argument(
15 | "--cache-dir", type=str, default=None, help="Path to location to store the models"
16 | )
17 | download_parser.add_argument(
18 | "--force", action="store_true", help="Force the model to be download even if already in cache-dir"
19 | )
20 | download_parser.add_argument("model", type=str, help="Name of the model to download")
21 | download_parser.set_defaults(func=download_command_factory)
22 |
23 | def __init__(self, model: str, cache: str, force: bool):
24 | self._model = model
25 | self._cache = cache
26 | self._force = force
27 |
28 | def run(self):
29 | from transformers import AutoModel, AutoTokenizer
30 |
31 | AutoModel.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
32 | AutoTokenizer.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
33 |
--------------------------------------------------------------------------------
/model_cards/twmkn9/distilroberta-base-squad2/README.md:
--------------------------------------------------------------------------------
1 | This model is [Distilroberta base](https://huggingface.co/distilroberta-base) trained on SQuAD v2 as:
2 |
3 | ```
4 | export SQUAD_DIR=../../squad2
5 | python3 run_squad.py
6 | --model_type robberta
7 | --model_name_or_path distilroberta-base
8 | --do_train
9 | --do_eval
10 | --overwrite_cache
11 | --do_lower_case
12 | --version_2_with_negative
13 | --save_steps 100000
14 | --train_file $SQUAD_DIR/train-v2.0.json
15 | --predict_file $SQUAD_DIR/dev-v2.0.json
16 | --per_gpu_train_batch_size 8
17 | --num_train_epochs 3
18 | --learning_rate 3e-5
19 | --max_seq_length 384
20 | --doc_stride 128
21 | --output_dir ./tmp/distilroberta_fine_tuned/
22 | ```
23 |
24 | Performance on a dev subset is close to the original paper:
25 |
26 | ```
27 | Results:
28 | {
29 | 'exact': 70.9279368213228,
30 | 'f1': 74.60439802429168,
31 | 'total': 6078,
32 | 'HasAns_exact': 67.62886597938144,
33 | 'HasAns_f1': 75.30774267754136,
34 | 'HasAns_total': 2910,
35 | 'NoAns_exact': 73.95833333333333,
36 | 'NoAns_f1': 73.95833333333333, 'NoAns_total': 3168,
37 | 'best_exact': 70.94438960184272,
38 | 'best_exact_thresh': 0.0,
39 | 'best_f1': 74.62085080481161,
40 | 'best_f1_thresh': 0.0
41 | }
42 | ```
43 |
44 | We are hopeful this might save you time, energy, and compute. Cheers!
--------------------------------------------------------------------------------
/model_cards/fmikaelian/camembert-base-fquad/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: french
3 | ---
4 |
5 | # camembert-base-fquad
6 |
7 | ## Description
8 |
9 | A baseline model for question-answering in french ([CamemBERT](https://camembert-model.fr/) model fine-tuned on [FQuAD](https://fquad.illuin.tech/))
10 |
11 | ## Training hyperparameters
12 |
13 | ```shell
14 | python3 ./examples/question-answering/run_squad.py \
15 | --model_type camembert \
16 | --model_name_or_path camembert-base \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file train.json \
21 | --predict_file valid.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3 \
29 | --save_steps 10000
30 | ```
31 |
32 | ## Evaluation results
33 |
34 | ```shell
35 | {"f1": 77.24515316052342, "exact_match": 52.82308657465496}
36 | ```
37 |
38 | ## Usage
39 |
40 | ```python
41 | from transformers import pipeline
42 |
43 | nlp = pipeline('question-answering', model='fmikaelian/camembert-base-fquad', tokenizer='fmikaelian/camembert-base-fquad')
44 |
45 | nlp({
46 | 'question': "Qui est Claude Monet?",
47 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
48 | })
49 | ```
--------------------------------------------------------------------------------
/model_cards/julien-c/dummy-unknown/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | tags:
3 | - ci
4 | ---
5 |
6 | ## Dummy model used for unit testing and CI
7 |
8 |
9 | ```python
10 | import json
11 | import os
12 | from transformers.configuration_roberta import RobertaConfig
13 | from transformers import RobertaForMaskedLM, TFRobertaForMaskedLM
14 |
15 | DIRNAME = "./dummy-unknown"
16 |
17 |
18 | config = RobertaConfig(10, 20, 1, 1, 40)
19 |
20 | model = RobertaForMaskedLM(config)
21 | model.save_pretrained(DIRNAME)
22 |
23 | tf_model = TFRobertaForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
24 | tf_model.save_pretrained(DIRNAME)
25 |
26 | # Tokenizer:
27 |
28 | vocab = [
29 | "l",
30 | "o",
31 | "w",
32 | "e",
33 | "r",
34 | "s",
35 | "t",
36 | "i",
37 | "d",
38 | "n",
39 | "\u0120",
40 | "\u0120l",
41 | "\u0120n",
42 | "\u0120lo",
43 | "\u0120low",
44 | "er",
45 | "\u0120lowest",
46 | "\u0120newer",
47 | "\u0120wider",
48 | "",
49 | ]
50 | vocab_tokens = dict(zip(vocab, range(len(vocab))))
51 | merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""]
52 |
53 | vocab_file = os.path.join(DIRNAME, "vocab.json")
54 | merges_file = os.path.join(DIRNAME, "merges.txt")
55 | with open(vocab_file, "w", encoding="utf-8") as fp:
56 | fp.write(json.dumps(vocab_tokens) + "\n")
57 | with open(merges_file, "w", encoding="utf-8") as fp:
58 | fp.write("\n".join(merges))
59 | ```
60 |
--------------------------------------------------------------------------------
/model_cards/twmkn9/distilbert-base-uncased-squad2/README.md:
--------------------------------------------------------------------------------
1 | This model is [Distilbert base uncased](https://huggingface.co/distilbert-base-uncased) trained on SQuAD v2 as:
2 |
3 | ```
4 | export SQUAD_DIR=../../squad2
5 | python3 run_squad.py
6 | --model_type distilbert
7 | --model_name_or_path distilbert-base-uncased
8 | --do_train
9 | --do_eval
10 | --overwrite_cache
11 | --do_lower_case
12 | --version_2_with_negative
13 | --save_steps 100000
14 | --train_file $SQUAD_DIR/train-v2.0.json
15 | --predict_file $SQUAD_DIR/dev-v2.0.json
16 | --per_gpu_train_batch_size 8
17 | --num_train_epochs 3
18 | --learning_rate 3e-5
19 | --max_seq_length 384
20 | --doc_stride 128
21 | --output_dir ./tmp/distilbert_fine_tuned/
22 | ```
23 |
24 | Performance on a dev subset is close to the original paper:
25 |
26 | ```
27 | Results:
28 | {
29 | 'exact': 64.88976637051661,
30 | 'f1': 68.1776176526635,
31 | 'total': 6078,
32 | 'HasAns_exact': 69.7594501718213,
33 | 'HasAns_f1': 76.62665295288285,
34 | 'HasAns_total': 2910,
35 | 'NoAns_exact': 60.416666666666664,
36 | 'NoAns_f1': 60.416666666666664,
37 | 'NoAns_total': 3168,
38 | 'best_exact': 64.88976637051661,
39 | 'best_exact_thresh': 0.0,
40 | 'best_f1': 68.17761765266337,
41 | 'best_f1_thresh': 0.0
42 | }
43 | ```
44 |
45 | We are hopeful this might save you time, energy, and compute. Cheers!
--------------------------------------------------------------------------------
/docs/source/bertology.rst:
--------------------------------------------------------------------------------
1 | BERTology
2 | ---------
3 |
4 | There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT (that some call "BERTology"). Some good examples of this field are:
5 |
6 |
7 | * BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick: https://arxiv.org/abs/1905.05950
8 | * Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
9 | * What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D. Manning: https://arxiv.org/abs/1906.04341
10 |
11 | In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to help people access the inner representations, mainly adapted from the great work of Paul Michel (https://arxiv.org/abs/1905.10650):
12 |
13 |
14 | * accessing all the hidden-states of BERT/GPT/GPT-2,
15 | * accessing all the attention weights for each head of BERT/GPT/GPT-2,
16 | * retrieving heads output values and gradients to be able to compute head importance score and prune head as explained in https://arxiv.org/abs/1905.10650.
17 |
18 | To help you understand and use these features, we have added a specific example script: `bertology.py `_ while extract information and prune a model pre-trained on GLUE.
19 |
--------------------------------------------------------------------------------
/model_cards/activebus/BERT-DK_rest/README.md:
--------------------------------------------------------------------------------
1 | # ReviewBERT
2 |
3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects.
4 |
5 | `BERT-DK_rest` is trained from 1G (19 types) restaurants from Yelp.
6 |
7 | ## Model Description
8 |
9 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus.
10 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/).
11 |
12 |
13 | ## Instructions
14 | Loading the post-trained weights are as simple as, e.g.,
15 |
16 | ```python
17 | import torch
18 | from transformers import AutoModel, AutoTokenizer
19 |
20 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-DK_rest")
21 | model = AutoModel.from_pretrained("activebus/BERT-DK_rest")
22 |
23 | ```
24 |
25 |
26 | ## Evaluation Results
27 |
28 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf)
29 |
30 |
31 | ## Citation
32 | If you find this work useful, please cite as following.
33 | ```
34 | @inproceedings{xu_bert2019,
35 | title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
36 | author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
37 | booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
38 | month = "jun",
39 | year = "2019",
40 | }
41 | ```
42 |
--------------------------------------------------------------------------------
/examples/token-classification/run.sh:
--------------------------------------------------------------------------------
1 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \
2 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
3 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \
4 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
5 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \
6 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
7 | wget "https://raw.githubusercontent.com/stefan-it/fine-tuned-berts-seq/master/scripts/preprocess.py"
8 | export MAX_LENGTH=128
9 | export BERT_MODEL=bert-base-multilingual-cased
10 | python3 preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
11 | python3 preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
12 | python3 preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
13 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
14 | export OUTPUT_DIR=germeval-model
15 | export BATCH_SIZE=32
16 | export NUM_EPOCHS=3
17 | export SAVE_STEPS=750
18 | export SEED=1
19 |
20 | python3 run_ner.py \
21 | --data_dir . \
22 | --labels ./labels.txt \
23 | --model_name_or_path $BERT_MODEL \
24 | --output_dir $OUTPUT_DIR \
25 | --max_seq_length $MAX_LENGTH \
26 | --num_train_epochs $NUM_EPOCHS \
27 | --per_gpu_train_batch_size $BATCH_SIZE \
28 | --save_steps $SAVE_STEPS \
29 | --seed $SEED \
30 | --do_train \
31 | --do_eval \
32 | --do_predict
33 |
--------------------------------------------------------------------------------
/model_cards/activebus/BERT-PT_rest/README.md:
--------------------------------------------------------------------------------
1 | # ReviewBERT
2 |
3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects.
4 |
5 | `BERT-DK_rest` is trained from 1G (19 types) restaurants from Yelp.
6 | `BERT-PT_*` addtionally uses SQuAD 1.1.
7 |
8 | ## Model Description
9 |
10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus.
11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/).
12 |
13 |
14 | ## Instructions
15 | Loading the post-trained weights are as simple as, e.g.,
16 |
17 | ```python
18 | import torch
19 | from transformers import AutoModel, AutoTokenizer
20 |
21 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-PT_rest")
22 | model = AutoModel.from_pretrained("activebus/BERT-PT_rest")
23 |
24 | ```
25 |
26 |
27 | ## Evaluation Results
28 |
29 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf)
30 |
31 |
32 | ## Citation
33 | If you find this work useful, please cite as following.
34 | ```
35 | @inproceedings{xu_bert2019,
36 | title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
37 | author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
38 | booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
39 | month = "jun",
40 | year = "2019",
41 | }
42 | ```
43 |
--------------------------------------------------------------------------------
/examples/summarization/t5/test_t5_examples.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import sys
3 | import tempfile
4 | import unittest
5 | from pathlib import Path
6 | from unittest.mock import patch
7 |
8 | from .evaluate_cnn import run_generate
9 |
10 |
11 | output_file_name = "output_t5_sum.txt"
12 | score_file_name = "score_t5_sum.txt"
13 |
14 | articles = ["New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County."]
15 |
16 | logging.basicConfig(level=logging.DEBUG)
17 |
18 | logger = logging.getLogger()
19 |
20 |
21 | class TestT5Examples(unittest.TestCase):
22 | def test_t5_cli(self):
23 | stream_handler = logging.StreamHandler(sys.stdout)
24 | logger.addHandler(stream_handler)
25 | tmp = Path(tempfile.gettempdir()) / "utest_generations_t5_sum.hypo"
26 | with tmp.open("w") as f:
27 | f.write("\n".join(articles))
28 |
29 | output_file_name = Path(tempfile.gettempdir()) / "utest_output_t5_sum.hypo"
30 | score_file_name = Path(tempfile.gettempdir()) / "utest_score_t5_sum.hypo"
31 |
32 | testargs = [
33 | "evaluate_cnn.py",
34 | "patrickvonplaten/t5-tiny-random",
35 | str(tmp),
36 | str(output_file_name),
37 | str(tmp),
38 | str(score_file_name),
39 | ]
40 |
41 | with patch.object(sys, "argv", testargs):
42 | run_generate()
43 | self.assertTrue(Path(output_file_name).exists())
44 | self.assertTrue(Path(score_file_name).exists())
45 |
--------------------------------------------------------------------------------
/tests/test_adapter_config.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from dataclasses import FrozenInstanceError
3 |
4 | from transformers import ADAPTER_CONFIG_MAP, AdapterConfig
5 |
6 | from .utils import require_torch
7 |
8 |
9 | @require_torch
10 | class AdapterConfigTest(unittest.TestCase):
11 |
12 | config_names = ["pfeiffer", "houlsby"]
13 |
14 | def test_config_load(self):
15 | download_kwargs = {"force_download": True}
16 | for config_name in self.config_names:
17 | with self.subTest(config_name=config_name):
18 | config = AdapterConfig.load(config_name, download_kwargs=download_kwargs, non_linearity="leakyrelu")
19 | self.assertTrue(isinstance(config, AdapterConfig))
20 | self.assertEqual(config.non_linearity, "leakyrelu")
21 |
22 | def test_config_immutable(self):
23 | def set_attr(config: AdapterConfig):
24 | config.ln_before = True
25 |
26 | for config in ADAPTER_CONFIG_MAP.values():
27 | with self.subTest(config=config.__class__.__name__):
28 | self.assertRaises(FrozenInstanceError, lambda: set_attr(config))
29 |
30 | def test_custom_attr(self):
31 | for config in ADAPTER_CONFIG_MAP.values():
32 | with self.subTest(config=config.__class__.__name__):
33 | # create a copy to leave original untouched
34 | config = config.replace()
35 | config.dummy_attr = "test_value"
36 | self.assertEqual(config.dummy_attr, "test_value")
37 |
--------------------------------------------------------------------------------
/model_cards/activebus/BERT-PT_laptop/README.md:
--------------------------------------------------------------------------------
1 | # ReviewBERT
2 |
3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects.
4 |
5 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`.
6 | `BERT-PT_*` addtionally uses SQuAD 1.1.
7 |
8 | ## Model Description
9 |
10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus.
11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/).
12 |
13 |
14 | ## Instructions
15 | Loading the post-trained weights are as simple as, e.g.,
16 |
17 | ```python
18 | import torch
19 | from transformers import AutoModel, AutoTokenizer
20 |
21 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-PT_laptop")
22 | model = AutoModel.from_pretrained("activebus/BERT-PT_laptop")
23 |
24 | ```
25 |
26 | ## Evaluation Results
27 |
28 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf)
29 |
30 |
31 | ## Citation
32 | If you find this work useful, please cite as following.
33 | ```
34 | @inproceedings{xu_bert2019,
35 | title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
36 | author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
37 | booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
38 | month = "jun",
39 | year = "2019",
40 | }
41 | ```
42 |
--------------------------------------------------------------------------------
/model_cards/fmikaelian/camembert-base-squad/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: french
3 | ---
4 |
5 | # camembert-base-squad
6 |
7 | ## Description
8 |
9 | A baseline model for question-answering in french ([CamemBERT](https://camembert-model.fr/) model fine-tuned on [french-translated SQuAD 1.1 dataset](https://github.com/Alikabbadj/French-SQuAD))
10 |
11 | ## Training hyperparameters
12 |
13 | ```shell
14 | python3 ./examples/question-answering/run_squad.py \
15 | --model_type camembert \
16 | --model_name_or_path camembert-base \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file SQuAD-v1.1-train_fr_ss999_awstart2_net.json \
21 | --predict_file SQuAD-v1.1-dev_fr_ss999_awstart2_net.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output3 \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3 \
29 | --save_steps 10000
30 | ```
31 |
32 | ## Evaluation results
33 |
34 | ```shell
35 | {"f1": 79.8570684959745, "exact_match": 59.21327108373895}
36 | ```
37 |
38 | ## Usage
39 |
40 | ```python
41 | from transformers import pipeline
42 |
43 | nlp = pipeline('question-answering', model='fmikaelian/camembert-base-squad', tokenizer='fmikaelian/camembert-base-squad')
44 |
45 | nlp({
46 | 'question': "Qui est Claude Monet?",
47 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
48 | })
49 | ```
--------------------------------------------------------------------------------
/model_cards/monologg/koelectra-base-generator/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: Korean
3 | ---
4 |
5 | # KoELECTRA (Base Generator)
6 |
7 | Pretrained ELECTRA Language Model for Korean (`koelectra-base-generator`)
8 |
9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md).
10 |
11 | ## Usage
12 |
13 | ### Load model and tokenizer
14 |
15 | ```python
16 | >>> from transformers import ElectraModel, ElectraTokenizer
17 |
18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-base-generator")
19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-generator")
20 | ```
21 |
22 | ### Tokenizer example
23 |
24 | ```python
25 | >>> from transformers import ElectraTokenizer
26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-generator")
27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")
28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']
29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'])
30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3]
31 | ```
32 |
33 | ## Example using ElectraForMaskedLM
34 |
35 | ```python
36 | from transformers import pipeline
37 |
38 | fill_mask = pipeline(
39 | "fill-mask",
40 | model="monologg/koelectra-base-generator",
41 | tokenizer="monologg/koelectra-base-generator"
42 | )
43 |
44 | print(fill_mask("나는 {} 밥을 먹었다.".format(fill_mask.tokenizer.mask_token)))
45 | ```
46 |
--------------------------------------------------------------------------------
/model_cards/julien-c/EsperBERTo-small/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: esperanto
3 | thumbnail: https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png
4 | ---
5 |
6 | # EsperBERTo: RoBERTa-like Language model trained on Esperanto
7 |
8 | **Companion model to blog post https://huggingface.co/blog/how-to-train** 🔥
9 |
10 | ## Training Details
11 |
12 | - current checkpoint: 566000
13 | - machine name: `galinette`
14 |
15 |
16 | 
17 |
18 | ## Example pipeline
19 |
20 | ```python
21 | from transformers import pipeline
22 |
23 | fill_mask = pipeline(
24 | "fill-mask",
25 | model="julien-c/EsperBERTo-small",
26 | tokenizer="julien-c/EsperBERTo-small"
27 | )
28 |
29 | fill_mask("Jen la komenco de bela .")
30 |
31 | # This is the beginning of a beautiful .
32 | # =>
33 |
34 | # {
35 | # 'score':0.06502299010753632
36 | # 'sequence':' Jen la komenco de bela vivo.'
37 | # 'token':1099
38 | # }
39 | # {
40 | # 'score':0.0421181358397007
41 | # 'sequence':' Jen la komenco de bela vespero.'
42 | # 'token':5100
43 | # }
44 | # {
45 | # 'score':0.024884626269340515
46 | # 'sequence':' Jen la komenco de bela laboro.'
47 | # 'token':1570
48 | # }
49 | # {
50 | # 'score':0.02324388362467289
51 | # 'sequence':' Jen la komenco de bela tago.'
52 | # 'token':1688
53 | # }
54 | # {
55 | # 'score':0.020378097891807556
56 | # 'sequence':' Jen la komenco de bela festo.'
57 | # 'token':4580
58 | # }
59 | ```
60 |
--------------------------------------------------------------------------------
/model_cards/monologg/koelectra-small-generator/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: Korean
3 | ---
4 |
5 | # KoELECTRA (Small Generator)
6 |
7 | Pretrained ELECTRA Language Model for Korean (`koelectra-small-generator`)
8 |
9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md).
10 |
11 | ## Usage
12 |
13 | ### Load model and tokenizer
14 |
15 | ```python
16 | >>> from transformers import ElectraModel, ElectraTokenizer
17 |
18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-small-generator")
19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-generator")
20 | ```
21 |
22 | ### Tokenizer example
23 |
24 | ```python
25 | >>> from transformers import ElectraTokenizer
26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-generator")
27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")
28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']
29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'])
30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3]
31 | ```
32 |
33 | ## Example using ElectraForMaskedLM
34 |
35 | ```python
36 | from transformers import pipeline
37 |
38 | fill_mask = pipeline(
39 | "fill-mask",
40 | model="monologg/koelectra-small-generator",
41 | tokenizer="monologg/koelectra-small-generator"
42 | )
43 |
44 | print(fill_mask("나는 {} 밥을 먹었다.".format(fill_mask.tokenizer.mask_token)))
45 | ```
46 |
--------------------------------------------------------------------------------
/model_cards/fmikaelian/flaubert-base-uncased-squad/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: french
3 | ---
4 |
5 | # flaubert-base-uncased-squad
6 |
7 | ## Description
8 |
9 | A baseline model for question-answering in french ([flaubert](https://github.com/getalp/Flaubert) model fine-tuned on [french-translated SQuAD 1.1 dataset](https://github.com/Alikabbadj/French-SQuAD))
10 |
11 | ## Training hyperparameters
12 |
13 | ```shell
14 | python3 ./examples/question-answering/run_squad.py \
15 | --model_type flaubert \
16 | --model_name_or_path flaubert-base-uncased \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file SQuAD-v1.1-train_fr_ss999_awstart2_net.json \
21 | --predict_file SQuAD-v1.1-dev_fr_ss999_awstart2_net.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3
29 | ```
30 |
31 | ## Evaluation results
32 |
33 | ```shell
34 | {"f1": 68.66174806561969, "exact_match": 49.299692063176714}
35 | ```
36 |
37 | ## Usage
38 |
39 | ```python
40 | from transformers import pipeline
41 |
42 | nlp = pipeline('question-answering', model='fmikaelian/flaubert-base-uncased-squad', tokenizer='fmikaelian/flaubert-base-uncased-squad')
43 |
44 | nlp({
45 | 'question': "Qui est Claude Monet?",
46 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
47 | })
48 | ```
--------------------------------------------------------------------------------
/model_cards/ixa-ehu/berteus-base-cased/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - basque
4 | ---
5 |
6 | # BERTeus base cased
7 |
8 | This is the Basque language pretrained model presented in [Give your Text Representation Models some Love: the Case for Basque](https://arxiv.org/pdf/2004.00033.pdf). This model has been trained on a Basque corpus comprising Basque crawled news articles from online newspapers and the Basque Wikipedia. The training corpus contains 224.6 million tokens, of which 35 million come from the Wikipedia.
9 |
10 | BERTeus has been tested on four different downstream tasks for Basque: part-of-speech (POS) tagging, named entity recognition (NER), sentiment analysis and topic classification; improving the state of the art for all tasks. See summary of results below:
11 |
12 |
13 | | Downstream task | BERTeus | mBERT | Previous SOTA |
14 | | --------------- | ------- | ------| ------------- |
15 | | Topic Classification | **76.77** | 68.42 | 63.00 |
16 | | Sentiment | **78.10** | 71.02 | 74.02 |
17 | | POS | **97.76** | 96.37 | 96.10 |
18 | | NER | **87.06** | 81.52 | 76.72 |
19 |
20 |
21 | If using this model, please cite the following paper:
22 | ```
23 | @inproceedings{agerri2020give,
24 | title={Give your Text Representation Models some Love: the Case for Basque},
25 | author={Rodrigo Agerri and I{\~n}aki San Vicente and Jon Ander Campos and Ander Barrena and Xabier Saralegi and Aitor Soroa and Eneko Agirre},
26 | booktitle={Proceedings of the 12th International Conference on Language Resources and Evaluation},
27 | year={2020}
28 | }
29 | ```
30 |
--------------------------------------------------------------------------------
/examples/adversarial/README.md:
--------------------------------------------------------------------------------
1 | ## Adversarial evaluation of model performances
2 |
3 | Here is an example on evaluating a model using adversarial evaluation of natural language inference with the Heuristic Analysis for NLI Systems (HANS) dataset [McCoy et al., 2019](https://arxiv.org/abs/1902.01007). The example was gracefully provided by [Nafise Sadat Moosavi](https://github.com/ns-moosavi).
4 |
5 | The HANS dataset can be downloaded from [this location](https://github.com/tommccoy1/hans).
6 |
7 | This is an example of using test_hans.py:
8 |
9 | ```bash
10 | export HANS_DIR=path-to-hans
11 | export MODEL_TYPE=type-of-the-model-e.g.-bert-roberta-xlnet-etc
12 | export MODEL_PATH=path-to-the-model-directory-that-is-trained-on-NLI-e.g.-by-using-run_glue.py
13 |
14 | python examples/hans/test_hans.py \
15 | --task_name hans \
16 | --model_type $MODEL_TYPE \
17 | --do_eval \
18 | --data_dir $HANS_DIR \
19 | --model_name_or_path $MODEL_PATH \
20 | --max_seq_length 128 \
21 | --output_dir $MODEL_PATH \
22 | ```
23 |
24 | This will create the hans_predictions.txt file in MODEL_PATH, which can then be evaluated using hans/evaluate_heur_output.py from the HANS dataset.
25 |
26 | The results of the BERT-base model that is trained on MNLI using batch size 8 and the random seed 42 on the HANS dataset is as follows:
27 |
28 | ```bash
29 | Heuristic entailed results:
30 | lexical_overlap: 0.9702
31 | subsequence: 0.9942
32 | constituent: 0.9962
33 |
34 | Heuristic non-entailed results:
35 | lexical_overlap: 0.199
36 | subsequence: 0.0396
37 | constituent: 0.118
38 | ```
39 |
--------------------------------------------------------------------------------
/model_cards/activebus/BERT-DK_laptop/README.md:
--------------------------------------------------------------------------------
1 | # ReviewBERT
2 |
3 | BERT (post-)trained from review corpus to understand sentiment, options and various e-commence aspects.
4 |
5 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`.
6 |
7 |
8 | ## Model Description
9 |
10 | The original model is from `BERT-base-uncased` trained from Wikipedia+BookCorpus.
11 | Models are post-trained from [Amazon Dataset](http://jmcauley.ucsd.edu/data/amazon/) and [Yelp Dataset](https://www.yelp.com/dataset/challenge/).
12 |
13 | `BERT-DK_laptop` is trained from 100MB laptop corpus under `Electronics/Computers & Accessories/Laptops`.
14 |
15 | ## Instructions
16 | Loading the post-trained weights are as simple as, e.g.,
17 |
18 | ```python
19 | import torch
20 | from transformers import AutoModel, AutoTokenizer
21 |
22 | tokenizer = AutoTokenizer.from_pretrained("activebus/BERT-DK_laptop")
23 | model = AutoModel.from_pretrained("activebus/BERT-DK_laptop")
24 |
25 | ```
26 |
27 |
28 | ## Evaluation Results
29 |
30 | Check our [NAACL paper](https://www.aclweb.org/anthology/N19-1242.pdf)
31 |
32 |
33 | ## Citation
34 | If you find this work useful, please cite as following.
35 | ```
36 | @inproceedings{xu_bert2019,
37 | title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
38 | author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
39 | booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
40 | month = "jun",
41 | year = "2019",
42 | }
43 | ```
44 |
--------------------------------------------------------------------------------
/adapter_docs/classes/roberta.rst:
--------------------------------------------------------------------------------
1 | RoBERTa
2 | ========
3 |
4 | The RoBERTa model was proposed in `RoBERTa: A Robustly Optimized BERT Pretraining Approach `_
5 | by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer,
6 | Veselin Stoyanov. It is based on Google's BERT model released in 2018.
7 |
8 | .. note::
9 | This class is nearly identical to the PyTorch implementation of RoBERTa in Huggingface Transformers.
10 | For more information, visit `the corresponding section in their documentation `_.
11 |
12 | RobertaConfig
13 | ~~~~~~~~~~~~~~~~~~~~~
14 |
15 | .. autoclass:: transformers.RobertaConfig
16 | :members:
17 |
18 |
19 | RobertaTokenizer
20 | ~~~~~~~~~~~~~~~~~~~~~
21 |
22 | .. autoclass:: transformers.RobertaTokenizer
23 | :members: build_inputs_with_special_tokens, get_special_tokens_mask,
24 | create_token_type_ids_from_sequences, save_vocabulary
25 |
26 |
27 | RobertaModel
28 | ~~~~~~~~~~~~~~~~~~~~
29 |
30 | .. autoclass:: transformers.RobertaModel
31 | :members:
32 |
33 |
34 | RobertaForMaskedLM
35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
36 |
37 | .. autoclass:: transformers.RobertaForMaskedLM
38 | :members:
39 |
40 |
41 | RobertaForSequenceClassification
42 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 |
44 | .. autoclass:: transformers.RobertaForSequenceClassification
45 | :members:
46 |
47 |
48 | RobertaForTokenClassification
49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
50 |
51 | .. autoclass:: transformers.RobertaForTokenClassification
52 | :members:
53 |
--------------------------------------------------------------------------------
/model_cards/ahotrod/roberta_large_squad2/README.md:
--------------------------------------------------------------------------------
1 | ## RoBERTa-large language model fine-tuned on SQuAD2.0
2 |
3 | ### with the following results:
4 |
5 | ```
6 | "exact": 84.46896319380106,
7 | "f1": 87.85388093408943,
8 | "total": 11873,
9 | "HasAns_exact": 81.37651821862349,
10 | "HasAns_f1": 88.1560607844881,
11 | "HasAns_total": 5928,
12 | "NoAns_exact": 87.55256518082422,
13 | "NoAns_f1": 87.55256518082422,
14 | "NoAns_total": 5945,
15 | "best_exact": 84.46896319380106,
16 | "best_exact_thresh": 0.0,
17 | "best_f1": 87.85388093408929,
18 | "best_f1_thresh": 0.0
19 | ```
20 | ### from script:
21 | ```
22 | python ${EXAMPLES}/run_squad.py \
23 | --model_type roberta \
24 | --model_name_or_path roberta-large \
25 | --do_train \
26 | --do_eval \
27 | --train_file ${SQUAD}/train-v2.0.json \
28 | --predict_file ${SQUAD}/dev-v2.0.json \
29 | --version_2_with_negative \
30 | --do_lower_case \
31 | --num_train_epochs 3 \
32 | --warmup_steps 1642 \
33 | --weight_decay 0.01 \
34 | --learning_rate 3e-5 \
35 | --adam_epsilon 1e-6 \
36 | --max_seq_length 512 \
37 | --doc_stride 128 \
38 | --per_gpu_train_batch_size 8 \
39 | --gradient_accumulation_steps 6 \
40 | --per_gpu_eval_batch_size 48 \
41 | --threads 12 \
42 | --logging_steps 50 \
43 | --save_steps 2000 \
44 | --overwrite_output_dir \
45 | --output_dir ${MODEL_PATH}
46 | $@
47 | ```
48 | ### using the following system & software:
49 | ```
50 | Transformers: 2.7.0
51 | PyTorch: 1.4.0
52 | TensorFlow: 2.1.0
53 | Python: 3.7.7
54 | OS/Platform: Linux-5.3.0-46-generic-x86_64-with-debian-buster-sid
55 | CPU/GPU: Intel i9-9900K / NVIDIA Titan RTX 24GB
56 | ```
57 |
--------------------------------------------------------------------------------
/model_cards/illuin/camembert-large-fquad/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: french
3 | ---
4 |
5 | # camembert-large-fquad
6 |
7 | ## Description
8 |
9 | A native French Question Answering model [CamemBERT-large](https://camembert-model.fr/) fine-tuned on [FQuAD](https://fquad.illuin.tech/).
10 |
11 | ## FQuAD Leaderboard and evaluation scores
12 |
13 | The results of Camembert-large-fquad can be compared with other state-of-the-art models of the [FQuAD Leaderboard](https://illuin-tech.github.io/FQuAD-explorer/).
14 |
15 | On the test set the model scores,
16 |
17 | ```shell
18 | {"f1": 91.5, "exact_match": 82.0}
19 | ```
20 |
21 | On the development set the model scores,
22 |
23 | ```shell
24 | {"f1": 91.0, "exact_match": 81.2}
25 | ```
26 |
27 | Note : You can also explore the results of the model on [FQuAD-Explorer](https://illuin-tech.github.io/FQuAD-explorer/) !
28 |
29 | ## Usage
30 |
31 | ```python
32 | from transformers import pipeline
33 |
34 | nlp = pipeline('question-answering', model='illuin/camembert-large-fquad', tokenizer='illuin/camembert-large-fquad')
35 |
36 | nlp({
37 | 'question': "Qui est Claude Monet?",
38 | 'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
39 | })
40 | ```
41 |
42 | ## Citation
43 |
44 | If you use our work, please cite:
45 |
46 | ```bibtex
47 | @article{dHoffschmidt2020FQuADFQ,
48 | title={FQuAD: French Question Answering Dataset},
49 | author={Martin d'Hoffschmidt and Maxime Vidal and Wacim Belblidia and Tom Brendl'e and Quentin Heinrich},
50 | journal={ArXiv},
51 | year={2020},
52 | volume={abs/2002.06071}
53 | }
54 | ```
55 |
--------------------------------------------------------------------------------
/src/transformers/configuration_camembert.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ CamemBERT configuration """
17 |
18 |
19 | import logging
20 |
21 | from .configuration_roberta import RobertaConfig
22 |
23 |
24 | logger = logging.getLogger(__name__)
25 |
26 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
27 | "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-config.json",
28 | "umberto-commoncrawl-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-commoncrawl-cased-v1/config.json",
29 | "umberto-wikipedia-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-wikipedia-uncased-v1/config.json",
30 | }
31 |
32 |
33 | class CamembertConfig(RobertaConfig):
34 | """
35 | This class overrides :class:`~transformers.RobertaConfig`. Please check the
36 | superclass for the appropriate documentation alongside usage examples.
37 | """
38 |
39 | model_type = "camembert"
40 |
--------------------------------------------------------------------------------
/src/transformers/configuration_mmbt.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 | # Copyright (c) HuggingFace Inc. team.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ MMBT configuration """
17 |
18 |
19 | import logging
20 |
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 |
25 | class MMBTConfig(object):
26 | """Configuration class to store the configuration of a `MMBT Model`.
27 |
28 | Args:
29 | config (:obj:`~transformers.PreTrainedConfig`):
30 | Config of the underlying Transformer models. Its values are
31 | copied over to use a single config.
32 | num_labels (:obj:`int` or :obj:`None`, optional, defaults to `None`):
33 | Size of final Linear layer for classification.
34 | modal_hidden_size (:obj:`int`, optional, defautls to 2048):
35 | Embedding dimension of the non-text modality encoder.
36 | """
37 |
38 | def __init__(self, config, num_labels=None, modal_hidden_size=2048):
39 | self.__dict__ = config.__dict__
40 | self.modal_hidden_size = modal_hidden_size
41 | if num_labels:
42 | self.num_labels = num_labels
43 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F41B Bug Report"
3 | about: Submit a bug report to help us improve adapter-transformers
4 | title: ''
5 | labels: 'bug'
6 | assignees: ''
7 |
8 | ---
9 |
10 | # 🐛 Bug
11 |
12 | ## Information
13 |
14 | Model I am using (Bert, XLNet ...):
15 |
16 | Language I am using the model on (English, Chinese ...):
17 |
18 | Adapter setup I am using (if any):
19 |
20 | The problem arises when using:
21 | * [ ] the official example scripts: (give details below)
22 | * [ ] my own modified scripts: (give details below)
23 |
24 | The tasks I am working on is:
25 | * [ ] an official GLUE/SQUaD task: (give the name)
26 | * [ ] my own task or dataset: (give details below)
27 |
28 | ## To reproduce
29 |
30 | Steps to reproduce the behavior:
31 |
32 | 1.
33 | 2.
34 | 3.
35 |
36 |
39 |
40 | ## Expected behavior
41 |
42 |
43 |
44 | ## Environment info
45 |
47 |
48 | - `transformers` version:
49 | - Platform:
50 | - Python version:
51 | - PyTorch version (GPU?):
52 | - Tensorflow version (GPU?):
53 | - Using GPU in script?:
54 | - Using distributed or parallel set-up in script?:
55 |
--------------------------------------------------------------------------------
/examples/multiple-choice/README.md:
--------------------------------------------------------------------------------
1 | ## Multiple Choice
2 |
3 | Based on the script [`run_multiple_choice.py`]().
4 |
5 | #### Fine-tuning on SWAG
6 | Download [swag](https://github.com/rowanz/swagaf/tree/master/data) data
7 |
8 | ```bash
9 | #training on 4 tesla V100(16GB) GPUS
10 | export SWAG_DIR=/path/to/swag_data_dir
11 | python ./examples/multiple-choice/run_multiple_choice.py \
12 | --task_name swag \
13 | --model_name_or_path roberta-base \
14 | --do_train \
15 | --do_eval \
16 | --data_dir $SWAG_DIR \
17 | --learning_rate 5e-5 \
18 | --num_train_epochs 3 \
19 | --max_seq_length 80 \
20 | --output_dir models_bert/swag_base \
21 | --per_gpu_eval_batch_size=16 \
22 | --per_device_train_batch_size=16 \
23 | --gradient_accumulation_steps 2 \
24 | --overwrite_output
25 | ```
26 | Training with the defined hyper-parameters yields the following results:
27 | ```
28 | ***** Eval results *****
29 | eval_acc = 0.8338998300509847
30 | eval_loss = 0.44457291918821606
31 | ```
32 |
33 |
34 | ## Tensorflow
35 |
36 | ```bash
37 | export SWAG_DIR=/path/to/swag_data_dir
38 | python ./examples/multiple-choice/run_tf_multiple_choice.py \
39 | --task_name swag \
40 | --model_name_or_path bert-base-cased \
41 | --do_train \
42 | --do_eval \
43 | --data_dir $SWAG_DIR \
44 | --learning_rate 5e-5 \
45 | --num_train_epochs 3 \
46 | --max_seq_length 80 \
47 | --output_dir models_bert/swag_base \
48 | --per_gpu_eval_batch_size=16 \
49 | --per_device_train_batch_size=16 \
50 | --logging-dir logs \
51 | --gradient_accumulation_steps 2 \
52 | --overwrite_output
53 | ```
54 |
55 | # Run it in colab
56 | [](https://colab.research.google.com/github/ViktorAlm/notebooks/blob/master/MPC_GPU_Demo_for_TF_and_PT.ipynb)
57 |
--------------------------------------------------------------------------------
/tests/test_tokenization_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 HuggingFace Inc..
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | import unittest
18 |
19 | from transformers import PreTrainedTokenizer
20 | from transformers.tokenization_gpt2 import GPT2Tokenizer
21 |
22 | from .utils import slow
23 |
24 |
25 | class TokenizerUtilsTest(unittest.TestCase):
26 | def check_tokenizer_from_pretrained(self, tokenizer_class):
27 | s3_models = list(tokenizer_class.max_model_input_sizes.keys())
28 | for model_name in s3_models[:1]:
29 | tokenizer = tokenizer_class.from_pretrained(model_name)
30 | self.assertIsNotNone(tokenizer)
31 | self.assertIsInstance(tokenizer, tokenizer_class)
32 | self.assertIsInstance(tokenizer, PreTrainedTokenizer)
33 |
34 | for special_tok in tokenizer.all_special_tokens:
35 | self.assertIsInstance(special_tok, str)
36 | special_tok_id = tokenizer.convert_tokens_to_ids(special_tok)
37 | self.assertIsInstance(special_tok_id, int)
38 |
39 | @slow
40 | def test_pretrained_tokenizers(self):
41 | self.check_tokenizer_from_pretrained(GPT2Tokenizer)
42 |
--------------------------------------------------------------------------------
/model_cards/valhalla/t5-base-squad/README.md:
--------------------------------------------------------------------------------
1 | # T5 for question-answering
2 | This is T5-base model fine-tuned on SQuAD1.1 for QA using text-to-text approach
3 |
4 | ## Model training
5 | This model was trained on colab TPU with 35GB RAM for 4 epochs
6 |
7 | ## Results:
8 | | Metric | #Value |
9 | |-------------|---------|
10 | | Exact Match | 81.5610 |
11 | | F1 | 89.9601 |
12 |
13 | ## Model in Action 🚀
14 | ```
15 | from transformers import AutoModelWithLMHead, AutoTokenizer
16 |
17 | tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-squad")
18 | model = AutoModelWithLMHead.from_pretrained("valhalla/t5-base-squad")
19 |
20 | def get_answer(question, context):
21 | input_text = "question: %s context: %s " % (question, context)
22 | features = tokenizer.batch_encode_plus([input_text], return_tensors='pt')
23 |
24 | out = model.generate(input_ids=features['input_ids'],
25 | attention_mask=features['attention_mask'])
26 |
27 | return tokenizer.decode(out[0])
28 |
29 | context = "In Norse mythology, Valhalla is a majestic, enormous hall located in Asgard, ruled over by the god Odin."
30 | question = "What is Valhalla ?"
31 |
32 | get_answer(question, context)
33 | # output: 'a majestic, enormous hall located in Asgard, ruled over by the god Odin'
34 | ```
35 | Play with this model [](https://colab.research.google.com/drive/1a5xpJiUjZybfU9Mi-aDkOp116PZ9-wni?usp=sharing)
36 |
37 | > Created by Suraj Patil [](https://github.com/patil-suraj/)
38 | [](https://twitter.com/psuraj28)
39 |
--------------------------------------------------------------------------------
/model_cards/Tereveni-AI/gpt2-124M-uk-fiction/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: ukrainian
3 | ---
4 |
5 | Note: **default code snippet above won't work** because we are using `AlbertTokenizer` with `GPT2LMHeadModel`, see [issue](https://github.com/huggingface/transformers/issues/4285).
6 |
7 | ## GPT2 124M Trained on Ukranian Fiction
8 |
9 | ### Training details
10 |
11 | Model was trained on corpus of 4040 fiction books, 2.77 GiB in total.
12 | Evaluation on [brown-uk](https://github.com/brown-uk/corpus) gives perplexity of 50.16.
13 |
14 | ### Example usage:
15 | ```python
16 | from transformers import AlbertTokenizer, GPT2LMHeadModel
17 |
18 | tokenizer = AlbertTokenizer.from_pretrained("Tereveni-AI/gpt2-124M-uk-fiction")
19 | model = GPT2LMHeadModel.from_pretrained("Tereveni-AI/gpt2-124M-uk-fiction")
20 |
21 | input_ids = tokenizer.encode("Но зла Юнона, суча дочка,", add_special_tokens=False, return_tensors='pt')
22 |
23 | outputs = model.generate(
24 | input_ids,
25 | do_sample=True,
26 | num_return_sequences=3,
27 | max_length=50
28 | )
29 |
30 | for i, out in enumerate(outputs):
31 | print("{}: {}".format(i, tokenizer.decode(out)))
32 | ```
33 |
34 | Prints something like this:
35 | ```bash
36 | 0: Но зла Юнона, суча дочка, яка затьмарила всі її таємниці: І хто з'їсть її душу, той помре». І, не дочекавшись гніву богів, посунула в пітьму, щоб не бачити перед собою. Але, за
37 | 1: Но зла Юнона, суча дочка, і довела мене до божевілля. Але він не знав нічого. Після того як я його побачив, мені стало зле. Я втратив рівновагу. Але в мене не було часу на роздуми. Я вже втратив надію
38 | 2: Но зла Юнона, суча дочка, не нарікала нам! — раптом вигукнула Юнона. — Це ти, старий йолопе! — мовила вона, не перестаючи сміятись. — Хіба ти не знаєш, що мені подобається ходити з тобою?
39 | ```
--------------------------------------------------------------------------------
/src/transformers/activations.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import math
3 |
4 | import torch
5 | import torch.nn.functional as F
6 |
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | def swish(x):
12 | return x * torch.sigmoid(x)
13 |
14 |
15 | def _gelu_python(x):
16 | """ Original Implementation of the gelu activation function in Google Bert repo when initially created.
17 | For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
18 | 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
19 | This is now written in C in torch.nn.functional
20 | Also see https://arxiv.org/abs/1606.08415
21 | """
22 | return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
23 |
24 |
25 | def gelu_new(x):
26 | """ Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
27 | Also see https://arxiv.org/abs/1606.08415
28 | """
29 | return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
30 |
31 |
32 | if torch.__version__ < "1.4.0":
33 | gelu = _gelu_python
34 | else:
35 | gelu = F.gelu
36 |
37 |
38 | def gelu_fast(x):
39 | return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 * (1.0 + 0.044715 * x * x)))
40 |
41 |
42 | ACT2FN = {
43 | "relu": F.relu,
44 | "swish": swish,
45 | "gelu": gelu,
46 | "tanh": torch.tanh,
47 | "gelu_new": gelu_new,
48 | "gelu_fast": gelu_fast,
49 | }
50 |
51 |
52 | def get_activation(activation_string):
53 | if activation_string in ACT2FN:
54 | return ACT2FN[activation_string]
55 | else:
56 | raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))
57 |
--------------------------------------------------------------------------------
/.github/workflows/tests_torch.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches: [ 'master' ]
6 | paths:
7 | - 'src/**'
8 | - 'examples/**'
9 | - 'templates/**'
10 | - 'tests/**'
11 | - 'utils/**'
12 | pull_request:
13 | branches: [ 'master' ]
14 | paths:
15 | - 'src/**'
16 | - 'examples/**'
17 | - 'templates/**'
18 | - 'tests/**'
19 | - 'utils/**'
20 |
21 | jobs:
22 | check_code_quality:
23 | runs-on: ubuntu-latest
24 | steps:
25 | - uses: actions/checkout@v2
26 | - uses: actions/setup-python@v2
27 | with:
28 | python-version: 3.6
29 | - uses: actions/cache@v2
30 | with:
31 | path: ~/.cache/pip
32 | key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }}
33 | restore-keys: |
34 | ${{ runner.os }}-pip-
35 | - name: Install
36 | run: |
37 | pip install .[torch,quality]
38 | pip uninstall isort -y
39 | pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
40 | - name: Check Quality
41 | run: |
42 | make quality
43 | run_reduced_tests_torch:
44 | runs-on: ubuntu-latest
45 | steps:
46 | - uses: actions/checkout@v2
47 | - uses: actions/setup-python@v2
48 | with:
49 | python-version: 3.6
50 | - uses: actions/cache@v2
51 | with:
52 | path: ~/.cache/pip
53 | key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }}
54 | restore-keys: |
55 | ${{ runner.os }}-pip-
56 | - name: Install
57 | run: |
58 | pip install .[sklearn,torch,testing]
59 | - name: Test
60 | run: |
61 | make test-reduced
62 |
--------------------------------------------------------------------------------
/model_cards/lvwerra/gpt2-imdb-ctrl/README.md:
--------------------------------------------------------------------------------
1 | # GPT2-IMDB-ctrl
2 |
3 | ## What is it?
4 | A small GPT2 (`lvwerra/gpt2-imdb`) language model fine-tuned to produce controlled movie reviews based the [IMDB dataset](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). The model is trained with rewards from a BERT sentiment classifier (`lvwerra/bert-imdb`) via PPO.
5 |
6 | ## Training setting
7 | The model was trained for `200` optimisation steps with a batch size of `256` which corresponds to `51200` training samples. The full experiment setup can be found in the Jupyter notebook in the [trl repo](https://lvwerra.github.io/trl/05-gpt2-sentiment-ppo-training/). The strings `"[negative]"`, `"[neutral]"`, and `"[positive]"` are added at the beginning of the query to control the sentiment.
8 |
9 | ## Examples
10 | A few examples of the model response to a query before and after optimisation:
11 |
12 | | query | response [negative] | rewards [negative] | response [neutral] | rewards [neutral] | response [positive] | rewards [positive] |
13 | |-------|---------------------|--------------------|--------------------|-------------------|---------------------|--------------------|
14 | |I watched this movie when|it was released and was awful. Little bit of ...|3.130034|it was released and it was the first movie I ...|-1.351991|I was younger it was wonderful. The new play ...|4.232218|
15 | |I can remember seeing this|movie in 2008, and I was so disappointed...yo...|3.428725|in support groups, which I think was not as i...|0.213288|movie, and it is one of my favorite movies ev...|4.168838|
16 | |This 1970 hit film has|little resonance. This movie is bad, not only...|4.241872|a bit of Rocket power.783287. It can be easil...|0.849278|the best formula for comedy and is't just jus...|4.208804|
17 |
18 |
19 |
--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_base/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - chinese
4 | ---
5 |
6 | # albert_chinese_base
7 |
8 | This a albert_chinese_base model from [Google's github](https://github.com/google-research/ALBERT)
9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 |
11 | ## Attention (注意)
12 |
13 | Since sentencepiece is not used in albert_chinese_base model
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!
15 | we can eval it using an example on MaskedLM
16 |
17 | 由於 albert_chinese_base 模型沒有用 sentencepiece
18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!!
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確
20 |
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 |
28 | pretrained = 'voidful/albert_chinese_base'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 |
32 | inputtext = "今天[MASK]情很好"
33 |
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 |
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `感 0.36333346366882324`
45 |
--------------------------------------------------------------------------------
/docs/source/model_doc/auto.rst:
--------------------------------------------------------------------------------
1 | AutoModels
2 | -----------
3 |
4 | In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you are supplying to the ``from_pretrained`` method.
5 |
6 | AutoClasses are here to do this job for you so that you automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary:
7 |
8 | Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will directly create a class of the relevant architecture (ex: ``model = AutoModel.from_pretrained('bert-base-cased')`` will create a instance of ``BertModel``).
9 |
10 |
11 | ``AutoConfig``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 |
14 | .. autoclass:: transformers.AutoConfig
15 | :members:
16 |
17 |
18 | ``AutoTokenizer``
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 |
21 | .. autoclass:: transformers.AutoTokenizer
22 | :members:
23 |
24 |
25 | ``AutoModel``
26 | ~~~~~~~~~~~~~~~~~~~~~
27 |
28 | .. autoclass:: transformers.AutoModel
29 | :members:
30 |
31 |
32 | ``AutoModelForPreTraining``
33 | ~~~~~~~~~~~~~~~~~~~~~
34 |
35 | .. autoclass:: transformers.AutoModelForPreTraining
36 | :members:
37 |
38 |
39 | ``AutoModelWithLMHead``
40 | ~~~~~~~~~~~~~~~~~~~~~
41 |
42 | .. autoclass:: transformers.AutoModelWithLMHead
43 | :members:
44 |
45 |
46 | ``AutoModelForSequenceClassification``
47 | ~~~~~~~~~~~~~~~~~~~~~
48 |
49 | .. autoclass:: transformers.AutoModelForSequenceClassification
50 | :members:
51 |
52 |
53 | ``AutoModelForQuestionAnswering``
54 | ~~~~~~~~~~~~~~~~~~~~~
55 |
56 | .. autoclass:: transformers.AutoModelForQuestionAnswering
57 | :members:
58 |
59 |
60 | ``AutoModelForTokenClassification``
61 | ~~~~~~~~~~~~~~~~~~~~~
62 |
63 | .. autoclass:: transformers.AutoModelForTokenClassification
64 | :members:
65 |
66 |
--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_large/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - chinese
4 | ---
5 |
6 | # albert_chinese_large
7 |
8 | This a albert_chinese_large model from [Google's github](https://github.com/google-research/ALBERT)
9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 |
11 | ## Attention (注意)
12 |
13 | Since sentencepiece is not used in albert_chinese_large model
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!
15 | we can eval it using an example on MaskedLM
16 |
17 | 由於 albert_chinese_large 模型沒有用 sentencepiece
18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!!
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確
20 |
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 |
28 | pretrained = 'voidful/albert_chinese_large'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 |
32 | inputtext = "今天[MASK]情很好"
33 |
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 |
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `心 0.9422469735145569`
45 |
--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_xlarge/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - chinese
4 | ---
5 |
6 | # albert_chinese_xlarge
7 |
8 | This a albert_chinese_xlarge model from [Google's github](https://github.com/google-research/ALBERT)
9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 |
11 | ## Attention (注意)
12 |
13 | Since sentencepiece is not used in albert_chinese_xlarge model
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!
15 | we can eval it using an example on MaskedLM
16 |
17 | 由於 albert_chinese_xlarge 模型沒有用 sentencepiece
18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!!
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確
20 |
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 |
28 | pretrained = 'voidful/albert_chinese_xlarge'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 |
32 | inputtext = "今天[MASK]情很好"
33 |
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 |
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `心 0.9942440390586853`
45 |
--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_xxlarge/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - chinese
4 | ---
5 |
6 | # albert_chinese_xxlarge
7 |
8 | This a albert_chinese_xxlarge model from [Google's github](https://github.com/google-research/ALBERT)
9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 |
11 | ## Attention (注意)
12 |
13 | Since sentencepiece is not used in albert_chinese_xxlarge model
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!
15 | we can eval it using an example on MaskedLM
16 |
17 | 由於 albert_chinese_xxlarge 模型沒有用 sentencepiece
18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!!
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確
20 |
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 |
28 | pretrained = 'voidful/albert_chinese_xxlarge'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 |
32 | inputtext = "今天[MASK]情很好"
33 |
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 |
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `心 0.995713472366333`
45 |
--------------------------------------------------------------------------------
/examples/translation/t5/test_t5_examples.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import sys
3 | import tempfile
4 | import unittest
5 | from pathlib import Path
6 | from unittest.mock import patch
7 |
8 | from .evaluate_wmt import run_generate
9 |
10 |
11 | text = ["When Liana Barrientos was 23 years old, she got married in Westchester County."]
12 | translation = ["Als Liana Barrientos 23 Jahre alt war, heiratete sie in Westchester County."]
13 |
14 | output_file_name = "output_t5_trans.txt"
15 | score_file_name = "score_t5_trans.txt"
16 |
17 | logging.basicConfig(level=logging.DEBUG)
18 |
19 | logger = logging.getLogger()
20 |
21 |
22 | class TestT5Examples(unittest.TestCase):
23 | def test_t5_cli(self):
24 | stream_handler = logging.StreamHandler(sys.stdout)
25 | logger.addHandler(stream_handler)
26 |
27 | tmp_source = Path(tempfile.gettempdir()) / "utest_generations_t5_trans.hypo"
28 | with tmp_source.open("w") as f:
29 | f.write("\n".join(text))
30 |
31 | tmp_target = Path(tempfile.gettempdir()) / "utest_generations_t5_trans.target"
32 | with tmp_target.open("w") as f:
33 | f.write("\n".join(translation))
34 |
35 | output_file_name = Path(tempfile.gettempdir()) / "utest_output_trans.hypo"
36 | score_file_name = Path(tempfile.gettempdir()) / "utest_score.hypo"
37 |
38 | testargs = [
39 | "evaluate_wmt.py",
40 | "patrickvonplaten/t5-tiny-random",
41 | str(tmp_source),
42 | str(output_file_name),
43 | str(tmp_target),
44 | str(score_file_name),
45 | ]
46 |
47 | with patch.object(sys, "argv", testargs):
48 | run_generate()
49 | self.assertTrue(Path(output_file_name).exists())
50 | self.assertTrue(Path(score_file_name).exists())
51 |
--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_tiny/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - chinese
4 | ---
5 |
6 | # albert_chinese_tiny
7 |
8 | This a albert_chinese_tiny model from [brightmart/albert_zh project](https://github.com/brightmart/albert_zh), albert_tiny_google_zh model
9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 |
11 | ## Attention (注意)
12 |
13 | Since sentencepiece is not used in albert_chinese_tiny model
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!
15 | we can eval it using an example on MaskedLM
16 |
17 | 由於 albert_chinese_tiny 模型沒有用 sentencepiece
18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!!
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確
20 |
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 |
28 | pretrained = 'voidful/albert_chinese_tiny'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 |
32 | inputtext = "今天[MASK]情很好"
33 |
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 |
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `感 0.40312355756759644`
45 |
--------------------------------------------------------------------------------
/model_cards/voidful/albert_chinese_small/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - chinese
4 | ---
5 |
6 | # albert_chinese_small
7 |
8 | This a albert_chinese_small model from [brightmart/albert_zh project](https://github.com/brightmart/albert_zh), albert_small_google_zh model
9 | converted by huggingface's [script](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py)
10 |
11 | ## Attention (注意)
12 |
13 | Since sentencepiece is not used in albert_chinese_small model
14 | you have to call BertTokenizer instead of AlbertTokenizer !!!
15 | we can eval it using an example on MaskedLM
16 |
17 | 由於 albert_chinese_small 模型沒有用 sentencepiece
18 | 用AlbertTokenizer會載不進詞表,因此需要改用BertTokenizer !!!
19 | 我們可以跑MaskedLM預測來驗證這個做法是否正確
20 |
21 | ## Justify (驗證有效性)
22 | [colab trial](https://colab.research.google.com/drive/1Wjz48Uws6-VuSHv_-DcWLilv77-AaYgj)
23 | ```python
24 | from transformers import *
25 | import torch
26 | from torch.nn.functional import softmax
27 |
28 | pretrained = 'voidful/albert_chinese_small'
29 | tokenizer = BertTokenizer.from_pretrained(pretrained)
30 | model = AlbertForMaskedLM.from_pretrained(pretrained)
31 |
32 | inputtext = "今天[MASK]情很好"
33 |
34 | maskpos = tokenizer.encode(inputtext, add_special_tokens=True).index(103)
35 |
36 | input_ids = torch.tensor(tokenizer.encode(inputtext, add_special_tokens=True)).unsqueeze(0) # Batch size 1
37 | outputs = model(input_ids, masked_lm_labels=input_ids)
38 | loss, prediction_scores = outputs[:2]
39 | logit_prob = softmax(prediction_scores[0, maskpos]).data.tolist()
40 | predicted_index = torch.argmax(prediction_scores[0, maskpos]).item()
41 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
42 | print(predicted_token,logit_prob[predicted_index])
43 | ```
44 | Result: `感 0.6390823125839233`
45 |
--------------------------------------------------------------------------------
/model_cards/jplu/tf-xlm-roberta-base/README.md:
--------------------------------------------------------------------------------
1 | # Tensorflow XLM-RoBERTa
2 |
3 | In this repository you will find different versions of the XLM-RoBERTa model for Tensorflow.
4 |
5 | ## XLM-RoBERTa
6 |
7 | [XLM-RoBERTa](https://ai.facebook.com/blog/-xlm-r-state-of-the-art-cross-lingual-understanding-through-self-supervision/) is a scaled cross lingual sentence encoder. It is trained on 2.5T of data across 100 languages data filtered from Common Crawl. XLM-R achieves state-of-the-arts results on multiple cross lingual benchmarks.
8 |
9 | ## Model Weights
10 |
11 | | Model | Downloads
12 | | -------------------------------- | ---------------------------------------------------------------------------------------------------------------
13 | | `jplu/tf-xlm-roberta-base` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/tf_model.h5)
14 | | `jplu/tf-xlm-roberta-large` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/tf_model.h5)
15 |
16 | ## Usage
17 |
18 | With Transformers >= 2.4 the Tensorflow models of XLM-RoBERTa can be loaded like:
19 |
20 | ```python
21 | from transformers import TFXLMRobertaModel
22 |
23 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-base")
24 | ```
25 | Or
26 | ```
27 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large")
28 | ```
29 |
30 | ## Huggingface model hub
31 |
32 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu).
33 |
34 | ## Acknowledgments
35 |
36 | Thanks to all the Huggingface team for the support and their amazing library!
37 |
--------------------------------------------------------------------------------
/model_cards/jplu/tf-xlm-roberta-large/README.md:
--------------------------------------------------------------------------------
1 | # Tensorflow XLM-RoBERTa
2 |
3 | In this repository you will find different versions of the XLM-RoBERTa model for Tensorflow.
4 |
5 | ## XLM-RoBERTa
6 |
7 | [XLM-RoBERTa](https://ai.facebook.com/blog/-xlm-r-state-of-the-art-cross-lingual-understanding-through-self-supervision/) is a scaled cross lingual sentence encoder. It is trained on 2.5T of data across 100 languages data filtered from Common Crawl. XLM-R achieves state-of-the-arts results on multiple cross lingual benchmarks.
8 |
9 | ## Model Weights
10 |
11 | | Model | Downloads
12 | | -------------------------------- | ---------------------------------------------------------------------------------------------------------------
13 | | `jplu/tf-xlm-roberta-base` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-base/tf_model.h5)
14 | | `jplu/tf-xlm-roberta-large` | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-xlm-roberta-large/tf_model.h5)
15 |
16 | ## Usage
17 |
18 | With Transformers >= 2.4 the Tensorflow models of XLM-RoBERTa can be loaded like:
19 |
20 | ```python
21 | from transformers import TFXLMRobertaModel
22 |
23 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-base")
24 | ```
25 | Or
26 | ```
27 | model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large")
28 | ```
29 |
30 | ## Huggingface model hub
31 |
32 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu).
33 |
34 | ## Acknowledgments
35 |
36 | Thanks to all the Huggingface team for the support and their amazing library!
37 |
--------------------------------------------------------------------------------
/model_cards/allenai/biomed_roberta_base/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | thumbnail: https://huggingface.co/front/thumbnails/allenai.png
3 | ---
4 |
5 | # BioMed-RoBERTa-base
6 |
7 | BioMed-RoBERTa-base is a language model based on the RoBERTa-base (Liu et. al, 2019) architecture. We adapt RoBERTa-base to 2.68 million scientific papers from the [Semantic Scholar](https://www.semanticscholar.org) corpus via continued pretraining. This amounts to 7.55B tokens and 47GB of data. We use the full text of the papers in training, not just abstracts.
8 |
9 | Specific details of the adaptive pretraining procedure can be found in Gururangan et. al, 2020.
10 |
11 |
12 | ## Evaluation
13 |
14 | BioMed-RoBERTa achieves competitive performance to state of the art models on a number of NLP tasks in the biomedical domain (numbers are mean (standard deviation) over 3+ random seeds)
15 |
16 |
17 | | Task | Task Type | RoBERTa-base | BioMed-RoBERTa-base |
18 | |--------------|---------------------|--------------|---------------------|
19 | | RCT-180K | Text Classification | 86.4 (0.3) | 86.9 (0.2) |
20 | | ChemProt | Relation Extraction | 81.1 (1.1) | 83.0 (0.7) |
21 | | JNLPBA | NER | 74.3 (0.2) | 75.2 (0.1) |
22 | | BC5CDR | NER | 85.6 (0.1) | 87.8 (0.1) |
23 | | NCBI-Disease | NER | 86.6 (0.3) | 87.1 (0.8) |
24 |
25 | More evaluations TBD.
26 |
27 | ## Citation
28 |
29 | If using this model, please cite the following paper:
30 |
31 | ```bibtex
32 | @inproceedings{domains,
33 | author = {Suchin Gururangan and Ana Marasović and Swabha Swayamdipta and Kyle Lo and Iz Beltagy and Doug Downey and Noah A. Smith},
34 | title = {Don't Stop Pretraining: Adapt Language Models to Domains and Tasks},
35 | year = {2020},
36 | booktitle = {Proceedings of ACL},
37 | }
38 | ```
39 |
--------------------------------------------------------------------------------
/model_cards/google/electra-large-generator/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: english
3 | thumbnail: https://huggingface.co/front/thumbnails/google.png
4 |
5 | license: apache-2.0
6 | ---
7 |
8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators
9 |
10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset.
11 |
12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB).
13 |
14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)).
15 |
16 | ## How to use the generator in `transformers`
17 |
18 | ```python
19 | from transformers import pipeline
20 |
21 | fill_mask = pipeline(
22 | "fill-mask",
23 | model="google/electra-large-generator",
24 | tokenizer="google/electra-large-generator"
25 | )
26 |
27 | print(
28 | fill_mask(f"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks.")
29 | )
30 |
31 | ```
32 |
--------------------------------------------------------------------------------
/model_cards/google/electra-small-generator/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: english
3 | thumbnail: https://huggingface.co/front/thumbnails/google.png
4 |
5 | license: apache-2.0
6 | ---
7 |
8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators
9 |
10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset.
11 |
12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB).
13 |
14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)).
15 |
16 | ## How to use the generator in `transformers`
17 |
18 | ```python
19 | from transformers import pipeline
20 |
21 | fill_mask = pipeline(
22 | "fill-mask",
23 | model="google/electra-small-generator",
24 | tokenizer="google/electra-small-generator"
25 | )
26 |
27 | print(
28 | fill_mask(f"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks.")
29 | )
30 |
31 | ```
32 |
--------------------------------------------------------------------------------
/model_cards/google/electra-base-generator/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: english
3 | thumbnail: https://huggingface.co/front/thumbnails/google.png
4 |
5 | license: apache-2.0
6 | ---
7 |
8 | ## ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators
9 |
10 | **ELECTRA** is a new method for self-supervised language representation learning. It can be used to pre-train transformer networks using relatively little compute. ELECTRA models are trained to distinguish "real" input tokens vs "fake" input tokens generated by another neural network, similar to the discriminator of a [GAN](https://arxiv.org/pdf/1406.2661.pdf). At small scale, ELECTRA achieves strong results even when trained on a single GPU. At large scale, ELECTRA achieves state-of-the-art results on the [SQuAD 2.0](https://rajpurkar.github.io/SQuAD-explorer/) dataset.
11 |
12 | For a detailed description and experimental results, please refer to our paper [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/pdf?id=r1xMH1BtvB).
13 |
14 | This repository contains code to pre-train ELECTRA, including small ELECTRA models on a single GPU. It also supports fine-tuning ELECTRA on downstream tasks including classification tasks (e.g,. [GLUE](https://gluebenchmark.com/)), QA tasks (e.g., [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), and sequence tagging tasks (e.g., [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/)).
15 |
16 | ## How to use the generator in `transformers`
17 |
18 | ```python
19 | from transformers import pipeline
20 |
21 | fill_mask = pipeline(
22 | "fill-mask",
23 | model="google/electra-base-generator",
24 | tokenizer="google/electra-base-generator"
25 | )
26 |
27 | print(
28 | fill_mask(f"HuggingFace is creating a {fill_mask.tokenizer.mask_token} that the community uses to solve NLP tasks.")
29 | )
30 |
31 | ```
32 |
--------------------------------------------------------------------------------
/examples/token-classification/run_pl.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Install newest ptl.
4 | pip install -U git+http://github.com/PyTorchLightning/pytorch-lightning/
5 | # for seqeval metrics import
6 | pip install -r ../requirements.txt
7 |
8 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \
9 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
10 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \
11 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
12 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \
13 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
14 | wget "https://raw.githubusercontent.com/stefan-it/fine-tuned-berts-seq/master/scripts/preprocess.py"
15 | export MAX_LENGTH=128
16 | export BERT_MODEL=bert-base-multilingual-cased
17 | python3 preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
18 | python3 preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
19 | python3 preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
20 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
21 | export BATCH_SIZE=32
22 | export NUM_EPOCHS=3
23 | export SEED=1
24 |
25 | export OUTPUT_DIR_NAME=germeval-model
26 | export CURRENT_DIR=${PWD}
27 | export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
28 | mkdir -p $OUTPUT_DIR
29 |
30 | # Add parent directory to python path to access lightning_base.py
31 | export PYTHONPATH="../":"${PYTHONPATH}"
32 |
33 | python3 run_pl_ner.py --data_dir ./ \
34 | --model_type bert \
35 | --labels ./labels.txt \
36 | --model_name_or_path $BERT_MODEL \
37 | --output_dir $OUTPUT_DIR \
38 | --max_seq_length $MAX_LENGTH \
39 | --num_train_epochs $NUM_EPOCHS \
40 | --train_batch_size $BATCH_SIZE \
41 | --seed $SEED \
42 | --do_train \
43 | --do_predict
--------------------------------------------------------------------------------
/model_cards/gaochangkuan/model_dir/README.md:
--------------------------------------------------------------------------------
1 | ## Generating Chinese poetry by topic.
2 |
3 | ```python
4 | from transformers import *
5 |
6 | tokenizer = BertTokenizer.from_pretrained("gaochangkuan/model_dir")
7 |
8 | model = AutoModelWithLMHead.from_pretrained("gaochangkuan/model_dir")
9 |
10 |
11 | prompt= '''田园躬耕'''
12 |
13 | length= 84
14 | stop_token=''
15 |
16 | temperature = 1.2
17 |
18 | repetition_penalty=1.3
19 |
20 | k= 30
21 | p= 0.95
22 |
23 | device ='cuda'
24 | seed=2020
25 | no_cuda=False
26 |
27 | prompt_text = prompt if prompt else input("Model prompt >>> ")
28 |
29 | encoded_prompt = tokenizer.encode(
30 | ''+prompt_text+'',
31 | add_special_tokens=False,
32 | return_tensors="pt"
33 | )
34 |
35 | encoded_prompt = encoded_prompt.to(device)
36 |
37 | output_sequences = model.generate(
38 | input_ids=encoded_prompt,
39 | max_length=length,
40 | min_length=10,
41 | do_sample=True,
42 | early_stopping=True,
43 | num_beams=10,
44 | temperature=temperature,
45 | top_k=k,
46 | top_p=p,
47 | repetition_penalty=repetition_penalty,
48 | bad_words_ids=None,
49 | bos_token_id=tokenizer.bos_token_id,
50 | pad_token_id=tokenizer.pad_token_id,
51 | eos_token_id=tokenizer.eos_token_id,
52 | length_penalty=1.2,
53 | no_repeat_ngram_size=2,
54 | num_return_sequences=1,
55 | attention_mask=None,
56 | decoder_start_token_id=tokenizer.bos_token_id,)
57 |
58 |
59 | generated_sequence = output_sequences[0].tolist()
60 | text = tokenizer.decode(generated_sequence)
61 |
62 |
63 | text = text[: text.find(stop_token) if stop_token else None]
64 |
65 | print(''.join(text).replace(' ','').replace('','').replace('',''))
66 | ```
67 |
--------------------------------------------------------------------------------
/tests/test_tokenization_distilbert.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from transformers.tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast
18 |
19 | from .test_tokenization_bert import BertTokenizationTest
20 | from .utils import slow
21 |
22 |
23 | class DistilBertTokenizationTest(BertTokenizationTest):
24 |
25 | tokenizer_class = DistilBertTokenizer
26 |
27 | def get_rust_tokenizer(self, **kwargs):
28 | return DistilBertTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)
29 |
30 | @slow
31 | def test_sequence_builders(self):
32 | tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
33 |
34 | text = tokenizer.encode("sequence builders", add_special_tokens=False)
35 | text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
36 |
37 | encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
38 | encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
39 |
40 | assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id]
41 | assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + text_2 + [
42 | tokenizer.sep_token_id
43 | ]
44 |
--------------------------------------------------------------------------------
/model_cards/allegro/herbert-klej-cased-tokenizer-v1/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: polish
3 | ---
4 |
5 | # HerBERT tokenizer
6 |
7 | **[HerBERT](https://en.wikipedia.org/wiki/Zbigniew_Herbert)** tokenizer is a character level byte-pair encoding with
8 | vocabulary size of 50k tokens. The tokenizer was trained on [Wolne Lektury](https://wolnelektury.pl/) and a publicly available subset of
9 | [National Corpus of Polish](http://nkjp.pl/index.php?page=14&lang=0) with [fastBPE](https://github.com/glample/fastBPE) library.
10 | Tokenizer utilize `XLMTokenizer` implementation from [transformers](https://github.com/huggingface/transformers).
11 |
12 | ## Tokenizer usage
13 | Herbert tokenizer should be used together with [HerBERT model](https://huggingface.co/allegro/herbert-klej-cased-v1):
14 | ```python
15 | from transformers import XLMTokenizer, RobertaModel
16 |
17 | tokenizer = XLMTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
18 | model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
19 |
20 | encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
21 | outputs = model(encoded_input)
22 | ```
23 |
24 | ## License
25 | CC BY-SA 4.0
26 |
27 | ## Citation
28 | If you use this tokenizer, please cite the following paper:
29 | ```
30 | @misc{rybak2020klej,
31 | title={KLEJ: Comprehensive Benchmark for Polish Language Understanding},
32 | author={Piotr Rybak and Robert Mroczkowski and Janusz Tracz and Ireneusz Gawlik},
33 | year={2020},
34 | eprint={2005.00630},
35 | archivePrefix={arXiv},
36 | primaryClass={cs.CL}
37 | }
38 | ```
39 | Paper is accepted at ACL 2020, as soon as proceedings appear, we will update the BibTeX.
40 |
41 | ## Authors
42 | Tokenizer was created by **Allegro Machine Learning Research** team.
43 |
44 | You can contact us at: klejbenchmark@allegro.pl
45 |
--------------------------------------------------------------------------------
/model_cards/nlptown/bert-base-multilingual-uncased-sentiment/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language:
3 | - english
4 | - dutch
5 | - german
6 | - french
7 | - italian
8 | - spanish
9 | ---
10 |
11 | # bert-base-multilingual-uncased-sentiment
12 |
13 | This a bert-base-multilingual-uncased model finetuned for sentiment analysis on product reviews in six languages: English, Dutch, German, French, Spanish and Italian. It predicts the sentiment of the review as a number of stars (between 1 and 5).
14 |
15 | This model is intended for direct use as a sentiment analysis model for product reviews in any of the six languages above, or for further finetuning on related sentiment analysis tasks.
16 |
17 | ## Training data
18 |
19 | Here is the number of product reviews we used for finetuning the model:
20 |
21 | | Language | Number of reviews |
22 | | -------- | ----------------- |
23 | | English | 150k |
24 | | Dutch | 80k |
25 | | German | 137k |
26 | | French | 140k |
27 | | Italian | 72k |
28 | | Spanish | 50k |
29 |
30 | ## Accuracy
31 |
32 | The finetuned model obtained the following accuracy on 5,000 held-out product reviews in each of the languages:
33 |
34 | - Accuracy (exact) is the exact match on the number of stars.
35 | - Accuracy (off-by-1) is the percentage of reviews where the number of stars the model predicts differs by a maximum of 1 from the number given by the human reviewer.
36 |
37 |
38 | | Language | Accuracy (exact) | Accuracy (off-by-1) |
39 | | -------- | ---------------------- | ------------------- |
40 | | English | 67% | 95%
41 | | Dutch | 57% | 93%
42 | | German | 61% | 94%
43 | | French | 59% | 94%
44 | | Italian | 59% | 95%
45 | | Spanish | 58% | 95%
46 |
47 | ## Contact
48 |
49 | Contact [NLP Town](https://www.nlp.town) for questions, feedback and/or requests for similar models.
50 |
--------------------------------------------------------------------------------
/model_cards/monologg/koelectra-base-discriminator/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: Korean
3 | ---
4 |
5 | # KoELECTRA (Base Discriminator)
6 |
7 | Pretrained ELECTRA Language Model for Korean (`koelectra-base-discriminator`)
8 |
9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md).
10 |
11 | ## Usage
12 |
13 | ### Load model and tokenizer
14 |
15 | ```python
16 | >>> from transformers import ElectraModel, ElectraTokenizer
17 |
18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-base-discriminator")
19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator")
20 | ```
21 |
22 | ### Tokenizer example
23 |
24 | ```python
25 | >>> from transformers import ElectraTokenizer
26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator")
27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")
28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']
29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'])
30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3]
31 | ```
32 |
33 | ## Example using ElectraForPreTraining
34 |
35 | ```python
36 | import torch
37 | from transformers import ElectraForPreTraining, ElectraTokenizer
38 |
39 | discriminator = ElectraForPreTraining.from_pretrained("monologg/koelectra-base-discriminator")
40 | tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-discriminator")
41 |
42 | sentence = "나는 방금 밥을 먹었다."
43 | fake_sentence = "나는 내일 밥을 먹었다."
44 |
45 | fake_tokens = tokenizer.tokenize(fake_sentence)
46 | fake_inputs = tokenizer.encode(fake_sentence, return_tensors="pt")
47 |
48 | discriminator_outputs = discriminator(fake_inputs)
49 | predictions = torch.round((torch.sign(discriminator_outputs[0]) + 1) / 2)
50 |
51 | print(list(zip(fake_tokens, predictions.tolist()[1:-1])))
52 | ```
53 |
--------------------------------------------------------------------------------
/model_cards/monologg/koelectra-small-discriminator/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | language: Korean
3 | ---
4 |
5 | # KoELECTRA (Small Discriminator)
6 |
7 | Pretrained ELECTRA Language Model for Korean (`koelectra-small-discriminator`)
8 |
9 | For more detail, please see [original repository](https://github.com/monologg/KoELECTRA/blob/master/README_EN.md).
10 |
11 | ## Usage
12 |
13 | ### Load model and tokenizer
14 |
15 | ```python
16 | >>> from transformers import ElectraModel, ElectraTokenizer
17 |
18 | >>> model = ElectraModel.from_pretrained("monologg/koelectra-small-discriminator")
19 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator")
20 | ```
21 |
22 | ### Tokenizer example
23 |
24 | ```python
25 | >>> from transformers import ElectraTokenizer
26 | >>> tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator")
27 | >>> tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")
28 | ['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]']
29 | >>> tokenizer.convert_tokens_to_ids(['[CLS]', '한국어', 'E', '##L', '##EC', '##T', '##RA', '##를', '공유', '##합니다', '.', '[SEP]'])
30 | [2, 18429, 41, 6240, 15229, 6204, 20894, 5689, 12622, 10690, 18, 3]
31 | ```
32 |
33 | ## Example using ElectraForPreTraining
34 |
35 | ```python
36 | import torch
37 | from transformers import ElectraForPreTraining, ElectraTokenizer
38 |
39 | discriminator = ElectraForPreTraining.from_pretrained("monologg/koelectra-small-discriminator")
40 | tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-discriminator")
41 |
42 | sentence = "나는 방금 밥을 먹었다."
43 | fake_sentence = "나는 내일 밥을 먹었다."
44 |
45 | fake_tokens = tokenizer.tokenize(fake_sentence)
46 | fake_inputs = tokenizer.encode(fake_sentence, return_tensors="pt")
47 |
48 | discriminator_outputs = discriminator(fake_inputs)
49 | predictions = torch.round((torch.sign(discriminator_outputs[0]) + 1) / 2)
50 |
51 | print(list(zip(fake_tokens, predictions.tolist()[1:-1])))
52 | ```
53 |
--------------------------------------------------------------------------------