├── .circleci
    ├── config.yml
    └── deploy.sh
├── .coveragerc
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── ---new-benchmark.md
    │   ├── --new-model-addition.md
    │   ├── bug-report.md
    │   ├── feature-request.md
    │   ├── migration.md
    │   └── question-help.md
    └── stale.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── deploy_multi_version_doc.sh
├── docker
    └── Dockerfile
├── docs
    ├── Makefile
    ├── README.md
    └── source
    │   ├── _static
    │       ├── css
    │       │   ├── Calibre-Light.ttf
    │       │   ├── Calibre-Medium.otf
    │       │   ├── Calibre-Regular.otf
    │       │   ├── Calibre-Thin.otf
    │       │   ├── code-snippets.css
    │       │   └── huggingface.css
    │       └── js
    │       │   ├── custom.js
    │       │   └── huggingface_logo.svg
    │   ├── benchmarks.md
    │   ├── bertology.rst
    │   ├── conf.py
    │   ├── converting_tensorflow_models.rst
    │   ├── examples.md
    │   ├── imgs
    │       ├── transformers_logo_name.png
    │       ├── warmup_constant_schedule.png
    │       ├── warmup_cosine_hard_restarts_schedule.png
    │       ├── warmup_cosine_schedule.png
    │       ├── warmup_cosine_warm_restarts_schedule.png
    │       └── warmup_linear_schedule.png
    │   ├── index.rst
    │   ├── installation.md
    │   ├── main_classes
    │       ├── configuration.rst
    │       ├── model.rst
    │       ├── optimizer_schedules.rst
    │       ├── processors.rst
    │       └── tokenizer.rst
    │   ├── migration.md
    │   ├── model_doc
    │       ├── albert.rst
    │       ├── auto.rst
    │       ├── bert.rst
    │       ├── camembert.rst
    │       ├── ctrl.rst
    │       ├── distilbert.rst
    │       ├── gpt.rst
    │       ├── gpt2.rst
    │       ├── roberta.rst
    │       ├── transformerxl.rst
    │       ├── xlm.rst
    │       └── xlnet.rst
    │   ├── model_sharing.md
    │   ├── multilingual.rst
    │   ├── notebooks.rst
    │   ├── pretrained_models.rst
    │   ├── quickstart.md
    │   ├── serialization.rst
    │   └── torchscript.rst
├── examples
    ├── README.md
    ├── benchmarks.py
    ├── contrib
    │   ├── README.md
    │   ├── run_camembert.py
    │   ├── run_openai_gpt.py
    │   ├── run_swag.py
    │   └── run_transfo_xl.py
    ├── distillation
    │   ├── README.md
    │   ├── distiller.py
    │   ├── grouped_batch_sampler.py
    │   ├── lm_seqs_dataset.py
    │   ├── requirements.txt
    │   ├── run_squad_w_distillation.py
    │   ├── scripts
    │   │   ├── binarized_data.py
    │   │   ├── extract.py
    │   │   ├── extract_distilbert.py
    │   │   └── token_counts.py
    │   ├── train.py
    │   ├── training_configs
    │   │   ├── distilbert-base-multilingual-cased.json
    │   │   ├── distilbert-base-uncased.json
    │   │   ├── distilgpt2.json
    │   │   └── distilroberta-base.json
    │   └── utils.py
    ├── mm-imdb
    │   ├── run_mmimdb.py
    │   └── utils_mmimdb.py
    ├── pplm
    │   ├── README.md
    │   ├── imgs
    │   │   ├── headfigure.png
    │   │   └── wooly.png
    │   ├── pplm_classification_head.py
    │   ├── run_pplm.py
    │   └── run_pplm_discrim_train.py
    ├── requirements.txt
    ├── run_bertology.py
    ├── run_generation.py
    ├── run_glue.py
    ├── run_lm_finetuning.py
    ├── run_multiple_choice.py
    ├── run_ner.py
    ├── run_squad.py
    ├── run_tf_glue.py
    ├── run_tf_ner.py
    ├── run_xnli.py
    ├── summarization
    │   ├── README.md
    │   ├── configuration_bertabs.py
    │   ├── convert_bertabs_original_pytorch_checkpoint.py
    │   ├── modeling_bertabs.py
    │   ├── requirements.txt
    │   ├── run_summarization.py
    │   ├── test_utils_summarization.py
    │   └── utils_summarization.py
    ├── test_examples.py
    ├── tests_samples
    │   ├── .gitignore
    │   ├── MRPC
    │   │   ├── dev.tsv
    │   │   └── train.tsv
    │   └── SQUAD
    │   │   ├── dev-v2.0.json
    │   │   └── train-v2.0.json
    ├── utils_multiple_choice.py
    └── utils_ner.py
├── hubconf.py
├── notebooks
    ├── Comparing-PT-and-TF-models.ipynb
    ├── Comparing-TF-and-PT-models-MLM-NSP.ipynb
    ├── Comparing-TF-and-PT-models-SQuAD.ipynb
    └── Comparing-TF-and-PT-models.ipynb
├── setup.cfg
├── setup.py
├── src
    └── transformers
    │   ├── __init__.py
    │   ├── commands
    │       ├── __init__.py
    │       ├── convert.py
    │       ├── download.py
    │       ├── run.py
    │       ├── serving.py
    │       ├── train.py
    │       └── user.py
    │   ├── configuration_albert.py
    │   ├── configuration_auto.py
    │   ├── configuration_bert.py
    │   ├── configuration_camembert.py
    │   ├── configuration_ctrl.py
    │   ├── configuration_distilbert.py
    │   ├── configuration_gpt2.py
    │   ├── configuration_mmbt.py
    │   ├── configuration_openai.py
    │   ├── configuration_roberta.py
    │   ├── configuration_t5.py
    │   ├── configuration_transfo_xl.py
    │   ├── configuration_utils.py
    │   ├── configuration_xlm.py
    │   ├── configuration_xlm_roberta.py
    │   ├── configuration_xlnet.py
    │   ├── convert_albert_original_tf_checkpoint_to_pytorch.py
    │   ├── convert_bert_original_tf_checkpoint_to_pytorch.py
    │   ├── convert_bert_pytorch_checkpoint_to_original_tf.py
    │   ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
    │   ├── convert_openai_original_tf_checkpoint_to_pytorch.py
    │   ├── convert_pytorch_checkpoint_to_tf2.py
    │   ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
    │   ├── convert_t5_original_tf_checkpoint_to_pytorch.py
    │   ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
    │   ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
    │   ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
    │   ├── data
    │       ├── __init__.py
    │       ├── metrics
    │       │   ├── __init__.py
    │       │   └── squad_metrics.py
    │       └── processors
    │       │   ├── __init__.py
    │       │   ├── glue.py
    │       │   ├── squad.py
    │       │   ├── utils.py
    │       │   └── xnli.py
    │   ├── file_utils.py
    │   ├── hf_api.py
    │   ├── modelcard.py
    │   ├── modeling_albert.py
    │   ├── modeling_auto.py
    │   ├── modeling_bert.py
    │   ├── modeling_camembert.py
    │   ├── modeling_ctrl.py
    │   ├── modeling_distilbert.py
    │   ├── modeling_encoder_decoder.py
    │   ├── modeling_gpt2.py
    │   ├── modeling_mmbt.py
    │   ├── modeling_openai.py
    │   ├── modeling_roberta.py
    │   ├── modeling_t5.py
    │   ├── modeling_tf_albert.py
    │   ├── modeling_tf_auto.py
    │   ├── modeling_tf_bert.py
    │   ├── modeling_tf_ctrl.py
    │   ├── modeling_tf_distilbert.py
    │   ├── modeling_tf_gpt2.py
    │   ├── modeling_tf_openai.py
    │   ├── modeling_tf_pytorch_utils.py
    │   ├── modeling_tf_roberta.py
    │   ├── modeling_tf_t5.py
    │   ├── modeling_tf_transfo_xl.py
    │   ├── modeling_tf_transfo_xl_utilities.py
    │   ├── modeling_tf_utils.py
    │   ├── modeling_tf_xlm.py
    │   ├── modeling_tf_xlnet.py
    │   ├── modeling_transfo_xl.py
    │   ├── modeling_transfo_xl_utilities.py
    │   ├── modeling_utils.py
    │   ├── modeling_xlm.py
    │   ├── modeling_xlm_roberta.py
    │   ├── modeling_xlnet.py
    │   ├── optimization.py
    │   ├── optimization_tf.py
    │   ├── pipelines.py
    │   ├── tokenization_albert.py
    │   ├── tokenization_auto.py
    │   ├── tokenization_bert.py
    │   ├── tokenization_bert_japanese.py
    │   ├── tokenization_camembert.py
    │   ├── tokenization_ctrl.py
    │   ├── tokenization_distilbert.py
    │   ├── tokenization_gpt2.py
    │   ├── tokenization_openai.py
    │   ├── tokenization_roberta.py
    │   ├── tokenization_t5.py
    │   ├── tokenization_transfo_xl.py
    │   ├── tokenization_utils.py
    │   ├── tokenization_xlm.py
    │   ├── tokenization_xlm_roberta.py
    │   └── tokenization_xlnet.py
├── templates
    ├── adding_a_new_example_script
    │   ├── README.md
    │   ├── run_xxx.py
    │   └── utils_xxx.py
    └── adding_a_new_model
    │   ├── README.md
    │   ├── configuration_xxx.py
    │   ├── convert_xxx_original_tf_checkpoint_to_pytorch.py
    │   ├── modeling_tf_xxx.py
    │   ├── modeling_xxx.py
    │   ├── tests
    │       ├── test_modeling_tf_xxx.py
    │       ├── test_modeling_xxx.py
    │       └── test_tokenization_xxx.py
    │   └── tokenization_xxx.py
├── tests
    ├── __init__.py
    ├── fixtures
    │   ├── empty.txt
    │   ├── input.txt
    │   ├── sample_text.txt
    │   ├── spiece.model
    │   └── test_sentencepiece.model
    ├── test_configuration_common.py
    ├── test_hf_api.py
    ├── test_model_card.py
    ├── test_modeling_albert.py
    ├── test_modeling_auto.py
    ├── test_modeling_bert.py
    ├── test_modeling_common.py
    ├── test_modeling_ctrl.py
    ├── test_modeling_distilbert.py
    ├── test_modeling_encoder_decoder.py
    ├── test_modeling_gpt2.py
    ├── test_modeling_openai.py
    ├── test_modeling_roberta.py
    ├── test_modeling_t5.py
    ├── test_modeling_tf_albert.py
    ├── test_modeling_tf_auto.py
    ├── test_modeling_tf_bert.py
    ├── test_modeling_tf_common.py
    ├── test_modeling_tf_ctrl.py
    ├── test_modeling_tf_distilbert.py
    ├── test_modeling_tf_gpt2.py
    ├── test_modeling_tf_openai_gpt.py
    ├── test_modeling_tf_roberta.py
    ├── test_modeling_tf_t5.py
    ├── test_modeling_tf_transfo_xl.py
    ├── test_modeling_tf_xlm.py
    ├── test_modeling_tf_xlnet.py
    ├── test_modeling_transfo_xl.py
    ├── test_modeling_xlm.py
    ├── test_modeling_xlnet.py
    ├── test_optimization.py
    ├── test_optimization_tf.py
    ├── test_pipelines.py
    ├── test_tokenization_albert.py
    ├── test_tokenization_auto.py
    ├── test_tokenization_bert.py
    ├── test_tokenization_bert_japanese.py
    ├── test_tokenization_common.py
    ├── test_tokenization_ctrl.py
    ├── test_tokenization_distilbert.py
    ├── test_tokenization_gpt2.py
    ├── test_tokenization_openai.py
    ├── test_tokenization_roberta.py
    ├── test_tokenization_t5.py
    ├── test_tokenization_transfo_xl.py
    ├── test_tokenization_utils.py
    ├── test_tokenization_xlm.py
    ├── test_tokenization_xlnet.py
    └── utils.py
├── transformers-cli
├── utils
    ├── download_glue_data.py
    └── link_tester.py
└── valohai.yaml


/.circleci/deploy.sh:
--------------------------------------------------------------------------------
 1 | cd docs
 2 | 
 3 | function deploy_doc(){
 4 | 	echo "Creating doc at commit $1 and pushing to folder $2"
 5 | 	git checkout $1
 6 | 	if [ ! -z "$2" ] 
 7 | 	then
 8 | 		if [ -d "$dir/$2" ]; then
 9 | 			echo "Directory" $2 "already exists"
10 | 		else
11 | 			echo "Pushing version" $2
12 | 			make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
13 | 		fi
14 | 	else
15 | 		echo "Pushing master"
16 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
17 | 	fi
18 | }
19 | 
20 | deploy_doc "master" 
21 | deploy_doc "b33a385" v1.0.0
22 | deploy_doc "fe02e45" v1.1.0
23 | deploy_doc "89fd345" v1.2.0
24 | deploy_doc "fc9faa8" v2.0.0
25 | deploy_doc "3ddce1d" v2.1.1
26 | deploy_doc "3616209" v2.2.0
27 | deploy_doc "d0f8b9a" v2.3.0
28 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source=transformers
 3 | omit =
 4 |     # skip convertion scripts from testing for now
 5 |     */convert_*
 6 |     */__main__.py
 7 | [report]
 8 | exclude_lines =
 9 |     pragma: no cover
10 |     raise
11 |     except
12 |     register_parameter


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/---new-benchmark.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F5A5 New Benchmark"
 3 | about: You benchmark a part of this library and would like to share your results
 4 | title: "[Benchmark]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # Benchmarking Transformers
11 | 
12 | ## Benchmark
13 | 
14 | Which part of Transformers did you benchmark?
15 | 
16 | ## Set-up
17 | 
18 | What did you run your benchmarks on? Please include details, such as: CPU, GPU? If using multiple GPUs, which parallelization did you use?
19 | 
20 | ## Results
21 | 
22 | Put your results here!
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/--new-model-addition.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F31FNew model addition"
 3 | about: Submit a proposal/request to implement a new Transformer-based model
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🌟New model addition
11 | 
12 | ## Model description
13 | 
14 | <!-- Important information -->
15 | 
16 | ## Open Source status
17 | 
18 | * [ ] the model implementation is available: (give details)
19 | * [ ] the model weights are available: (give details)
20 | * [ ] who are the authors: (mention them)
21 | 
22 | ## Additional context
23 | 
24 | <!-- Add any other context about the problem here. -->
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F41B Bug Report"
 3 | about: Submit a bug report to help us improve PyTorch Transformers
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## 🐛 Bug
11 | 
12 | <!-- Important information -->
13 | 
14 | Model I am using (Bert, XLNet....):
15 | 
16 | Language I am using the model on (English, Chinese....):
17 | 
18 | The problem arise when using:
19 | * [ ] the official example scripts: (give details)
20 | * [ ] my own modified scripts: (give details)
21 | 
22 | The tasks I am working on is:
23 | * [ ] an official GLUE/SQUaD task: (give the name)
24 | * [ ] my own task or dataset: (give details)
25 | 
26 | ## To Reproduce
27 | 
28 | Steps to reproduce the behavior:
29 | 
30 | 1.
31 | 2.
32 | 3.
33 | 
34 | <!-- If you have a code sample, error messages, stack traces, please provide it here as well. -->
35 | 
36 | ## Expected behavior
37 | 
38 | <!-- A clear and concise description of what you expected to happen. -->
39 | 
40 | ## Environment
41 | 
42 | * OS:
43 | * Python version:
44 | * PyTorch version:
45 | * PyTorch Transformers version (or branch):
46 | * Using GPU ?
47 | * Distributed or parallel setup ?
48 | * Any other relevant information:
49 | 
50 | ## Additional context
51 | 
52 | <!-- Add any other context about the problem here. -->
53 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680 Feature Request"
 3 | about: Submit a proposal/request for a new PyTorch Transformers feature
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## 🚀 Feature
11 | 
12 | <!-- A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist. -->
13 | 
14 | ## Motivation
15 | 
16 | <!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. -->
17 | 
18 | ## Additional context
19 | 
20 | <!-- Add any other context or screenshots about the feature request here. -->
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/migration.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F4DA Migration from PyTorch-pretrained-Bert"
 3 | about: Report a problem when migrating from PyTorch-pretrained-Bert to Transformers
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## 📚 Migration
11 | 
12 | <!-- Important information -->
13 | 
14 | Model I am using (Bert, XLNet....):
15 | 
16 | Language I am using the model on (English, Chinese....):
17 | 
18 | The problem arise when using:
19 | * [ ] the official example scripts: (give details)
20 | * [ ] my own modified scripts: (give details)
21 | 
22 | The tasks I am working on is:
23 | * [ ] an official GLUE/SQUaD task: (give the name)
24 | * [ ] my own task or dataset: (give details)
25 | 
26 | Details of the issue:
27 | 
28 | <!-- A clear and concise description of the migration issue. If you have code snippets, please provide it here as well. -->
29 | 
30 | ## Environment
31 | 
32 | * OS:
33 | * Python version:
34 | * PyTorch version:
35 | * PyTorch Transformers version (or branch):
36 | * Using GPU ?
37 | * Distributed or parallel setup ?
38 | * Any other relevant information:
39 | 
40 | ## Checklist
41 | 
42 | - [ ] I have read the migration guide in the readme.
43 | - [ ] I checked if a related official extension example runs on my machine.
44 | 
45 | ## Additional context
46 | 
47 | <!-- Add any other context about the problem here. -->
48 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question-help.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓Questions & Help"
 3 | about: Start a general discussion related to PyTorch Transformers
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## ❓ Questions & Help
11 | 
12 | <!-- A clear and concise description of the question. -->
13 | 


--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | # Number of days of inactivity before an issue becomes stale
 2 | daysUntilStale: 60
 3 | # Number of days of inactivity before a stale issue is closed
 4 | daysUntilClose: 7
 5 | # Issues with these labels will never be considered stale
 6 | exemptLabels:
 7 |   - pinned
 8 |   - security
 9 | # Label to use when marking an issue as stale
10 | staleLabel: wontfix
11 | # Comment to post when marking an issue as stale. Set to `false` to disable
12 | markComment: >
13 |   This issue has been automatically marked as stale because it has not had
14 |   recent activity. It will be closed if no further activity occurs. Thank you
15 |   for your contributions.
16 | # Comment to post when closing a stale issue. Set to `false` to disable
17 | closeComment: false


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Initially taken from Github's Python gitignore file
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # IPython
 79 | profile_default/
 80 | ipython_config.py
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # celery beat schedule file
 86 | celerybeat-schedule
 87 | 
 88 | # SageMath parsed files
 89 | *.sage.py
 90 | 
 91 | # Environments
 92 | .env
 93 | .venv
 94 | env/
 95 | venv/
 96 | ENV/
 97 | env.bak/
 98 | venv.bak/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | .dmypy.json
113 | dmypy.json
114 | 
115 | # Pyre type checker
116 | .pyre/
117 | 
118 | # vscode
119 | .vscode
120 | 
121 | # Pycharm
122 | .idea
123 | 
124 | # TF code
125 | tensorflow_code
126 | 
127 | # Models
128 | models
129 | proc_data
130 | 
131 | # examples
132 | runs
133 | examples/runs
134 | 
135 | # data
136 | /data
137 | serialization_dir
138 | 
139 | # emacs
140 | *.*~
141 | debug.env
142 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: quality style test test-examples
 2 | 
 3 | # Check that source code meets quality standards
 4 | 
 5 | quality:
 6 | 	black --check --line-length 119 --target-version py35 examples templates tests src utils
 7 | 	isort --check-only --recursive examples templates tests src utils
 8 | 	flake8 examples templates tests src utils
 9 | 
10 | # Format source code automatically
11 | 
12 | style:
13 | 	black --line-length 119 --target-version py35 examples templates tests src utils
14 | 	isort --recursive examples templates tests src utils
15 | 
16 | # Run tests for the library
17 | 
18 | test:
19 | 	python -m pytest -n auto --dist=loadfile -s -v ./tests/
20 | 
21 | # Run tests for examples
22 | 
23 | test-examples:
24 | 	python -m pytest -n auto --dist=loadfile -s -v ./examples/
25 | 


--------------------------------------------------------------------------------
/deploy_multi_version_doc.sh:
--------------------------------------------------------------------------------
 1 | cd docs
 2 | 
 3 | function deploy_doc(){
 4 | 	echo "Creating doc at commit $1 and pushing to folder $2"
 5 | 	git checkout $1
 6 | 	if [ ! -z "$2" ] 
 7 | 	then
 8 | 		echo "Pushing version" $2
 9 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
10 | 	else
11 | 		echo "Pushing master"
12 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
13 | 	fi
14 | }
15 | 
16 | deploy_doc "master" 
17 | deploy_doc "b33a385" v1.0.0
18 | deploy_doc "fe02e45" v1.1.0
19 | deploy_doc "89fd345" v1.2.0
20 | deploy_doc "fc9faa8" v2.0.0
21 | deploy_doc "3ddce1d" v2.1.1
22 | deploy_doc "f2f3294" v2.2.0
23 | deploy_doc "d0f8b9a" v2.3.0
24 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:latest
2 | 
3 | RUN git clone https://github.com/NVIDIA/apex.git && cd apex && python setup.py install --cuda_ext --cpp_ext
4 | 
5 | RUN pip install transformers
6 | 
7 | WORKDIR /workspace


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Generating the documentation
 2 | 
 3 | To generate the documentation, you first have to build it. Several packages are necessary to build the doc,
 4 | you can install them with the following command, at the root of the code repository:
 5 | 
 6 | ```bash
 7 | pip install -e ".[docs]"
 8 | ```
 9 | 
10 | ## Packages installed
11 | 
12 | Here's an overview of all the packages installed. If you ran the previous command installing all packages from
13 | `requirements.txt`, you do not need to run the following commands.
14 | 
15 | Building it requires the package `sphinx` that you can
16 | install using:
17 | 
18 | ```bash
19 | pip install -U sphinx
20 | ```
21 | 
22 | You would also need the custom installed [theme](https://github.com/readthedocs/sphinx_rtd_theme) by
23 | [Read The Docs](https://readthedocs.org/). You can install it using the following command:
24 | 
25 | ```bash
26 | pip install sphinx_rtd_theme
27 | ```
28 | 
29 | The third necessary package is the `recommonmark` package to accept Markdown as well as Restructured text:
30 | 
31 | ```bash
32 | pip install recommonmark
33 | ```
34 | 
35 | ## Building the documentation
36 | 
37 | Make sure that there is a symlink from the `example` file (in /examples) inside the source folder. Run the following
38 | command to generate it:
39 | 
40 | ```bash
41 | ln -s ../../examples/README.md examples.md
42 | ```
43 | 
44 | Once you have setup `sphinx`, you can build the documentation by running the following command in the `/docs` folder:
45 | 
46 | ```bash
47 | make html
48 | ```
49 | 
50 | ---
51 | **NOTE**
52 | 
53 | If you are adding/removing elements from the toc-tree or from any structural item, it is recommended to clean the build
54 | directory before rebuilding. Run the following command to clean and build:
55 | 
56 | ```bash
57 | make clean && make html
58 | ```
59 | 
60 | ---
61 | 
62 | It should build the static app that will be available under `/docs/_build/html`
63 | 
64 | ## Adding a new element to the tree (toc-tree)
65 | 
66 | Accepted files are reStructuredText (.rst) and Markdown (.md). Create a file with its extension and put it
67 | in the source directory. You can then link it to the toc-tree by putting the filename without the extension.
68 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/_static/css/Calibre-Light.ttf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/_static/css/Calibre-Medium.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/_static/css/Calibre-Regular.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/Calibre-Thin.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/_static/css/Calibre-Thin.otf


--------------------------------------------------------------------------------
/docs/source/_static/css/code-snippets.css:
--------------------------------------------------------------------------------
 1 | 
 2 | .highlight .c1, .highlight .sd{
 3 |     color: #999
 4 | }
 5 | 
 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc {
 7 |     color: #FB8D68;
 8 | }
 9 | 
10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow {
11 |     color: #6670FF;
12 | }


--------------------------------------------------------------------------------
/docs/source/benchmarks.md:
--------------------------------------------------------------------------------
 1 | # Benchmarks
 2 | 
 3 | This section is dedicated to the Benchmarks done by the library, both by maintainers, contributors and users. These 
 4 | benchmark will help keep track of the preformance improvements that are brought to our models across versions.
 5 | 
 6 | ## Benchmarking all models for inference
 7 | 
 8 | As of version 2.1 we have benchmarked all models for inference, across many different settings: using PyTorch, with
 9 | and without TorchScript, using TensorFlow, with and without XLA. All of those tests were done across CPUs (except for
10 | TensorFlow XLA) and GPUs.
11 | 
12 | The approach is detailed in the [following blogpost](https://medium.com/huggingface/benchmarking-transformers-pytorch-and-tensorflow-e2917fb891c2)
13 | 
14 | The results are available [here](https://docs.google.com/spreadsheets/d/1sryqufw2D0XlUH4sq3e9Wnxu5EAQkaohzrJbd5HdQ_w/edit?usp=sharing).
15 | 
16 | ## TF2 with mixed precision, XLA, Distribution (@tlkh)
17 | 
18 | This work was done by [Timothy Liu](https://github.com/tlkh).
19 | 
20 | There are very positive results to be gained from the various TensorFlow 2.0 features:
21 | 
22 | - Automatic Mixed Precision (AMP)
23 | - XLA compiler
24 | - Distribution strategies (multi-GPU)
25 | 
26 | The benefits are listed here (tested on CoLA, MRPC, SST-2):
27 | 
28 | - AMP: Between 1.4x to 1.6x decrease in overall time without change in batch size
29 | - AMP+XLA: Up to 2.5x decrease in overall time on SST-2 (larger dataset)
30 | - Distribution: Between 1.4x to 3.4x decrease in overall time on 4xV100
31 | - Combined: Up to 5.7x decrease in overall training time, or 9.1x training throughput
32 | 
33 | The model quality (measured by the validation accuracy) fluctuates slightly. Taking an average of 4 training runs 
34 | on a single GPU gives the following results:
35 | 
36 | - CoLA: AMP results in slighter lower acc (0.820 vs 0.824)
37 | - MRPC: AMP results in lower acc (0.823 vs 0.835)
38 | - SST-2: AMP results in slighter lower acc (0.918 vs 0.922)
39 | 
40 | However, in a distributed setting with 4xV100 (4x batch size), AMP can yield in better results:
41 | 
42 | CoLA: AMP results in higher acc (0.828 vs 0.812)
43 | MRPC: AMP results in lower acc (0.817 vs 0.827)
44 | SST-2: AMP results in slightly lower acc (0.926 vs 0.929)
45 | 
46 | The benchmark script is available [here](https://github.com/NVAITC/benchmarking/blob/master/tf2/bert_dist.py).
47 | 
48 | Note: on some tasks (e.g. MRPC), the dataset is too small. The overhead due to the model compilation with XLA as well
49 | as the distribution strategy setup does not speed things up. The XLA compile time is also the reason why although throughput 
50 | can increase a lot (e.g. 2.7x for single GPU), overall (end-to-end) training speed-up is not as fast (as low as 1.4x)
51 | 
52 | The benefits as seen on SST-2 (larger dataset) is much clear.
53 | 
54 | All results can be seen on this [Google Sheet](https://docs.google.com/spreadsheets/d/1538MN224EzjbRL239sqSiUy6YY-rAjHyXhTzz_Zptls/edit#gid=960868445).
55 | 


--------------------------------------------------------------------------------
/docs/source/bertology.rst:
--------------------------------------------------------------------------------
 1 | BERTology
 2 | ---------
 3 | 
 4 | There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT (that some call "BERTology"). Some good examples of this field are:
 5 | 
 6 | 
 7 | * BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick: https://arxiv.org/abs/1905.05950
 8 | * Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
 9 | * What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D. Manning: https://arxiv.org/abs/1906.04341
10 | 
11 | In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to help people access the inner representations, mainly adapted  from the great work of Paul Michel (https://arxiv.org/abs/1905.10650):
12 | 
13 | 
14 | * accessing all the hidden-states of BERT/GPT/GPT-2,
15 | * accessing all the attention weights for each head of BERT/GPT/GPT-2,
16 | * retrieving heads output values and gradients to be able to compute head importance score and prune head as explained in https://arxiv.org/abs/1905.10650.
17 | 
18 | To help you understand and use these features, we have added a specific example script: `bertology.py <https://github.com/huggingface/transformers/blob/master/examples/run_bertology.py>`_ while extract information and prune a model pre-trained on GLUE.
19 | 


--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | ../../examples/README.md


--------------------------------------------------------------------------------
/docs/source/imgs/transformers_logo_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/imgs/transformers_logo_name.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_constant_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/imgs/warmup_constant_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/imgs/warmup_cosine_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png


--------------------------------------------------------------------------------
/docs/source/imgs/warmup_linear_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/docs/source/imgs/warmup_linear_schedule.png


--------------------------------------------------------------------------------
/docs/source/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | Transformers is tested on Python 3.5+ and PyTorch 1.1.0
 4 | 
 5 | ## With pip
 6 | 
 7 | PyTorch Transformers can be installed using pip as follows:
 8 | 
 9 | ``` bash
10 | pip install transformers
11 | ```
12 | 
13 | ## From source
14 | 
15 | To install from source, clone the repository and install with:
16 | 
17 | ``` bash
18 | git clone https://github.com/huggingface/transformers.git
19 | cd transformers
20 | pip install .
21 | ```
22 | 
23 | ## Tests
24 | 
25 | An extensive test suite is included to test the library behavior and several examples. Library tests can be found in the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the [examples folder](https://github.com/huggingface/transformers/tree/master/examples).
26 | 
27 | Refer to the [contributing guide](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md#tests) for details about running tests.
28 | 
29 | ## OpenAI GPT original tokenization workflow
30 | 
31 | If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install `ftfy` and `SpaCy`:
32 | 
33 | ``` bash
34 | pip install spacy ftfy==4.4.3
35 | python -m spacy download en
36 | ```
37 | 
38 | If you don't install `ftfy` and `SpaCy`, the `OpenAI GPT` tokenizer will default to tokenize using BERT's `BasicTokenizer` followed by Byte-Pair Encoding (which should be fine for most usage, don't worry).
39 | 
40 | ## Note on model downloads (Continuous Integration or large-scale deployments)
41 | 
42 | If you expect to be downloading large volumes of models (more than 1,000) from our hosted bucket (for instance through your CI setup, or a large-scale production deployment), please cache the model files on your end. It will be way faster, and cheaper. Feel free to contact us privately if you need any help.
43 | 
44 | ## Do you want to run a Transformer model on a mobile device?
45 | 
46 | You should check out our [swift-coreml-transformers](https://github.com/huggingface/swift-coreml-transformers) repo.
47 | 
48 | It contains a set of tools to convert PyTorch or TensorFlow 2.0 trained Transformer models (currently contains `GPT-2`, `DistilGPT-2`, `BERT`, and `DistilBERT`) to CoreML models that run on iOS devices.
49 | 
50 | At some point in the future, you'll be able to seamlessly move from pre-training or fine-tuning models in PyTorch to productizing them in CoreML,
51 | or prototype a model or an app in CoreML then research its hyperparameters or architecture from PyTorch. Super exciting!
52 | 


--------------------------------------------------------------------------------
/docs/source/main_classes/configuration.rst:
--------------------------------------------------------------------------------
 1 | Configuration
 2 | ----------------------------------------------------
 3 | 
 4 | The base class ``PretrainedConfig`` implements the common methods for loading/saving a configuration either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository).
 5 | 
 6 | ``PretrainedConfig``
 7 | ~~~~~~~~~~~~~~~~~~~~~
 8 | 
 9 | .. autoclass:: transformers.PretrainedConfig
10 |     :members:
11 | 


--------------------------------------------------------------------------------
/docs/source/main_classes/model.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ----------------------------------------------------
 3 | 
 4 | The base class ``PreTrainedModel`` implements the common methods for loading/saving a model either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository).
 5 | 
 6 | ``PreTrainedModel`` also implements a few methods which are common among all the models to:
 7 | 
 8 | - resize the input token embeddings when new tokens are added to the vocabulary
 9 | - prune the attention heads of the model.
10 | 
11 | ``PreTrainedModel``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.PreTrainedModel
15 |     :members:
16 | 
17 | ``TFPreTrainedModel``
18 | ~~~~~~~~~~~~~~~~~~~~~
19 | 
20 | .. autoclass:: transformers.TFPreTrainedModel
21 |     :members:
22 | 


--------------------------------------------------------------------------------
/docs/source/main_classes/optimizer_schedules.rst:
--------------------------------------------------------------------------------
 1 | Optimizer
 2 | ----------------------------------------------------
 3 | 
 4 | The ``.optimization`` module provides:
 5 | 
 6 | - an optimizer with weight decay fixed that can be used to fine-tuned models, and
 7 | - several schedules in the form of schedule objects that inherit from ``_LRSchedule``:
 8 | - a gradient accumulation class to accumulate the gradients of multiple batches
 9 | 
10 | ``AdamW``
11 | ~~~~~~~~~~~~~~~~
12 | 
13 | .. autoclass:: transformers.AdamW
14 |     :members:
15 | 
16 | ``AdamWeightDecay``
17 | ~~~~~~~~~~~~~~~~~~~
18 | 
19 | .. autoclass:: transformers.AdamWeightDecay
20 |     :members:
21 | 
22 | .. autofunction:: transformers.create_optimizer
23 |     :members:
24 | 
25 | Schedules
26 | ----------------------------------------------------
27 | 
28 | Learning Rate Schedules
29 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 | 
31 | .. autofunction:: transformers.get_constant_schedule
32 | 
33 | 
34 | .. autofunction:: transformers.get_constant_schedule_with_warmup
35 | 
36 | .. image:: /imgs/warmup_constant_schedule.png
37 |     :target: /imgs/warmup_constant_schedule.png
38 |     :alt:
39 | 
40 | 
41 | .. autofunction:: transformers.get_cosine_schedule_with_warmup
42 |     :members:
43 | 
44 | .. image:: /imgs/warmup_cosine_schedule.png
45 |     :target: /imgs/warmup_cosine_schedule.png
46 |     :alt:
47 | 
48 | 
49 | .. autofunction:: transformers.get_cosine_with_hard_restarts_schedule_with_warmup
50 | 
51 | .. image:: /imgs/warmup_cosine_hard_restarts_schedule.png
52 |     :target: /imgs/warmup_cosine_hard_restarts_schedule.png
53 |     :alt:
54 | 
55 | 
56 | 
57 | .. autofunction:: transformers.get_linear_schedule_with_warmup
58 | 
59 | .. image:: /imgs/warmup_linear_schedule.png
60 |     :target: /imgs/warmup_linear_schedule.png
61 |     :alt:
62 | 
63 | ``Warmup``
64 | ~~~~~~~~~~~~~~~~
65 | 
66 | .. autoclass:: transformers.Warmup
67 |     :members:
68 | 
69 | Gradient Strategies
70 | ----------------------------------------------------
71 | 
72 | ``GradientAccumulator``
73 | ~~~~~~~~~~~~~~~~~~~~~~~
74 | 
75 | .. autoclass:: transformers.GradientAccumulator
76 | 


--------------------------------------------------------------------------------
/docs/source/main_classes/tokenizer.rst:
--------------------------------------------------------------------------------
 1 | Tokenizer
 2 | ----------------------------------------------------
 3 | 
 4 | The base class ``PreTrainedTokenizer`` implements the common methods for loading/saving a tokenizer either from a local file or directory, or from a pretrained tokenizer provided by the library (downloaded from HuggingFace's AWS S3 repository).
 5 | 
 6 | ``PreTrainedTokenizer`` is the main entry point into tokenizers as it also implements the main methods for using all the tokenizers:
 7 | 
 8 | - tokenizing, converting tokens to ids and back and encoding/decoding,
 9 | - adding new tokens to the vocabulary in a way that is independant of the underlying structure (BPE, SentencePiece...),
10 | - managing special tokens (adding them, assigning them to roles, making sure they are not split during tokenization)
11 | 
12 | ``PreTrainedTokenizer``
13 | ~~~~~~~~~~~~~~~~~~~~~~~~
14 | 
15 | .. autoclass:: transformers.PreTrainedTokenizer
16 |     :members:
17 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/albert.rst:
--------------------------------------------------------------------------------
 1 | ALBERT
 2 | ----------------------------------------------------
 3 | 
 4 | ``AlbrtConfig``
 5 | ~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | .. autoclass:: transformers.AlbertConfig
 8 |     :members:
 9 | 
10 | 
11 | ``AlbertTokenizer``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.AlbertTokenizer
15 |     :members:
16 | 
17 | 
18 | ``AlbertModel``
19 | ~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.AlbertModel
22 |     :members:
23 | 
24 | 
25 | ``AlbertForMaskedLM``
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.AlbertForMaskedLM
29 |     :members:
30 | 
31 | 
32 | ``AlbertForSequenceClassification``
33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | .. autoclass:: transformers.AlbertForSequenceClassification
36 |     :members:
37 | 
38 | 
39 | ``AlbertForQuestionAnswering``
40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41 | 
42 | .. autoclass:: transformers.AlbertForQuestionAnswering
43 |     :members:
44 | 
45 | 
46 | ``TFAlbertModel``
47 | ~~~~~~~~~~~~~~~~~~~~
48 | 
49 | .. autoclass:: transformers.TFAlbertModel
50 |     :members:
51 | 
52 | 
53 | ``TFAlbertForMaskedLM``
54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
55 | 
56 | .. autoclass:: transformers.TFAlbertForMaskedLM
57 |     :members:
58 | 
59 | 
60 | ``TFAlbertForSequenceClassification``
61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
62 | 
63 | .. autoclass:: transformers.TFAlbertForSequenceClassification
64 |     :members:
65 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/auto.rst:
--------------------------------------------------------------------------------
 1 | AutoModels
 2 | -----------
 3 | 
 4 | In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you are supplying to the ``from_pretrained`` method.
 5 | 
 6 | AutoClasses are here to do this job for you so that you automatically retreive the relevant model given the name/path to the pretrained weights/config/vocabulary:
 7 | 
 8 | Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will directly create a class of the relevant architecture (ex: ``model = AutoModel.from_pretrained('bert-base-cased')`` will create a instance of ``BertModel``).
 9 | 
10 | 
11 | ``AutoConfig``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.AutoConfig
15 |     :members:
16 | 
17 | 
18 | ``AutoModel``
19 | ~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.AutoModel
22 |     :members:
23 | 
24 | 
25 | ``AutoTokenizer``
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.AutoTokenizer
29 |     :members:
30 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/bert.rst:
--------------------------------------------------------------------------------
  1 | BERT
  2 | ----------------------------------------------------
  3 | 
  4 | ``BertConfig``
  5 | ~~~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | .. autoclass:: transformers.BertConfig
  8 |     :members:
  9 | 
 10 | 
 11 | ``BertTokenizer``
 12 | ~~~~~~~~~~~~~~~~~~~~~
 13 | 
 14 | .. autoclass:: transformers.BertTokenizer
 15 |     :members:
 16 | 
 17 | 
 18 | ``BertModel``
 19 | ~~~~~~~~~~~~~~~~~~~~
 20 | 
 21 | .. autoclass:: transformers.BertModel
 22 |     :members:
 23 | 
 24 | 
 25 | ``BertForPreTraining``
 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 27 | 
 28 | .. autoclass:: transformers.BertForPreTraining
 29 |     :members:
 30 | 
 31 | 
 32 | ``BertForMaskedLM``
 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 34 | 
 35 | .. autoclass:: transformers.BertForMaskedLM
 36 |     :members:
 37 | 
 38 | 
 39 | ``BertForNextSentencePrediction``
 40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 41 | 
 42 | .. autoclass:: transformers.BertForNextSentencePrediction
 43 |     :members:
 44 | 
 45 | 
 46 | ``BertForSequenceClassification``
 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 48 | 
 49 | .. autoclass:: transformers.BertForSequenceClassification
 50 |     :members:
 51 | 
 52 | 
 53 | ``BertForMultipleChoice``
 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 55 | 
 56 | .. autoclass:: transformers.BertForMultipleChoice
 57 |     :members:
 58 | 
 59 | 
 60 | ``BertForTokenClassification``
 61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 62 | 
 63 | .. autoclass:: transformers.BertForTokenClassification
 64 |     :members:
 65 | 
 66 | 
 67 | ``BertForQuestionAnswering``
 68 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 69 | 
 70 | .. autoclass:: transformers.BertForQuestionAnswering
 71 |     :members:
 72 | 
 73 | 
 74 | ``TFBertModel``
 75 | ~~~~~~~~~~~~~~~~~~~~
 76 | 
 77 | .. autoclass:: transformers.TFBertModel
 78 |     :members:
 79 | 
 80 | 
 81 | ``TFBertForPreTraining``
 82 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 83 | 
 84 | .. autoclass:: transformers.TFBertForPreTraining
 85 |     :members:
 86 | 
 87 | 
 88 | ``TFBertForMaskedLM``
 89 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 90 | 
 91 | .. autoclass:: transformers.TFBertForMaskedLM
 92 |     :members:
 93 | 
 94 | 
 95 | ``TFBertForNextSentencePrediction``
 96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 97 | 
 98 | .. autoclass:: transformers.TFBertForNextSentencePrediction
 99 |     :members:
100 | 
101 | 
102 | ``TFBertForSequenceClassification``
103 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
104 | 
105 | .. autoclass:: transformers.TFBertForSequenceClassification
106 |     :members:
107 | 
108 | 
109 | ``TFBertForMultipleChoice``
110 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
111 | 
112 | .. autoclass:: transformers.TFBertForMultipleChoice
113 |     :members:
114 | 
115 | 
116 | ``TFBertForTokenClassification``
117 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
118 | 
119 | .. autoclass:: transformers.TFBertForTokenClassification
120 |     :members:
121 | 
122 | 
123 | ``TFBertForQuestionAnswering``
124 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125 | 
126 | .. autoclass:: transformers.TFBertForQuestionAnswering
127 |     :members:
128 | 
129 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/camembert.rst:
--------------------------------------------------------------------------------
 1 | CamemBERT
 2 | ----------------------------------------------------
 3 | 
 4 | ``CamembertConfig``
 5 | ~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | .. autoclass:: transformers.CamembertConfig
 8 |     :members:
 9 | 
10 | 
11 | ``CamembertTokenizer``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.CamembertTokenizer
15 |     :members:
16 | 
17 | 
18 | ``CamembertModel``
19 | ~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.CamembertModel
22 |     :members:
23 | 
24 | 
25 | ``CamembertForMaskedLM``
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.CamembertForMaskedLM
29 |     :members:
30 | 
31 | 
32 | ``CamembertForSequenceClassification``
33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | .. autoclass:: transformers.CamembertForSequenceClassification
36 |     :members:
37 | 
38 | 
39 | ``CamembertForMultipleChoice``
40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41 | 
42 | .. autoclass:: transformers.CamembertForMultipleChoice
43 |     :members:
44 | 
45 | 
46 | ``CamembertForTokenClassification``
47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48 | 
49 | .. autoclass:: transformers.CamembertForTokenClassification
50 |     :members:
51 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/ctrl.rst:
--------------------------------------------------------------------------------
 1 | CTRL
 2 | ----------------------------------------------------
 3 | 
 4 | Note: if you fine-tune a CTRL model using the Salesforce code (https://github.com/salesforce/ctrl),
 5 | you'll be able to convert from TF to our HuggingFace/Transformers format using the 
 6 | ``convert_tf_to_huggingface_pytorch.py`` script (see `issue #1654 <https://github.com/huggingface/transformers/issues/1654>`_).
 7 | 
 8 | 
 9 | ``CTRLConfig``
10 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 | 
12 | .. autoclass:: transformers.CTRLConfig
13 |     :members:
14 | 
15 | 
16 | ``CTRLTokenizer``
17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | 
19 | .. autoclass:: transformers.CTRLTokenizer
20 |     :members:
21 | 
22 | 
23 | ``CTRLModel``
24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 | 
26 | .. autoclass:: transformers.CTRLModel
27 |     :members:
28 | 
29 | 
30 | ``CTRLLMHeadModel``
31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 | 
33 | .. autoclass:: transformers.CTRLLMHeadModel
34 |     :members:
35 | 
36 | 
37 | ``TFCTRLModel``
38 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39 | 
40 | .. autoclass:: transformers.TFCTRLModel
41 |     :members:
42 | 
43 | 
44 | ``TFCTRLLMHeadModel``
45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 | 
47 | .. autoclass:: transformers.TFCTRLLMHeadModel
48 |     :members:
49 | 
50 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/distilbert.rst:
--------------------------------------------------------------------------------
 1 | DistilBERT
 2 | ----------------------------------------------------
 3 | 
 4 | ``DistilBertConfig``
 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | .. autoclass:: transformers.DistilBertConfig
 8 |     :members:
 9 | 
10 | 
11 | ``DistilBertTokenizer``
12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.DistilBertTokenizer
15 |     :members:
16 | 
17 | 
18 | ``DistilBertModel``
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.DistilBertModel
22 |     :members:
23 | 
24 | 
25 | ``DistilBertForMaskedLM``
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.DistilBertForMaskedLM
29 |     :members:
30 | 
31 | 
32 | ``DistilBertForSequenceClassification``
33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | .. autoclass:: transformers.DistilBertForSequenceClassification
36 |     :members:
37 | 
38 | 
39 | ``DistilBertForQuestionAnswering``
40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41 | 
42 | .. autoclass:: transformers.DistilBertForQuestionAnswering
43 |     :members:
44 | 
45 | ``TFDistilBertModel``
46 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
47 | 
48 | .. autoclass:: transformers.TFDistilBertModel
49 |     :members:
50 | 
51 | 
52 | ``TFDistilBertForMaskedLM``
53 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 | 
55 | .. autoclass:: transformers.TFDistilBertForMaskedLM
56 |     :members:
57 | 
58 | 
59 | ``TFDistilBertForSequenceClassification``
60 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
61 | 
62 | .. autoclass:: transformers.TFDistilBertForSequenceClassification
63 |     :members:
64 | 
65 | 
66 | ``TFDistilBertForQuestionAnswering``
67 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
68 | 
69 | .. autoclass:: transformers.TFDistilBertForQuestionAnswering
70 |     :members:
71 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/gpt.rst:
--------------------------------------------------------------------------------
 1 | OpenAI GPT
 2 | ----------------------------------------------------
 3 | 
 4 | ``OpenAIGPTConfig``
 5 | ~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | .. autoclass:: transformers.OpenAIGPTConfig
 8 |     :members:
 9 | 
10 | 
11 | ``OpenAIGPTTokenizer``
12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.OpenAIGPTTokenizer
15 |     :members:
16 | 
17 | 
18 | ``OpenAIGPTModel``
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.OpenAIGPTModel
22 |     :members:
23 | 
24 | 
25 | ``OpenAIGPTLMHeadModel``
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.OpenAIGPTLMHeadModel
29 |     :members:
30 | 
31 | 
32 | ``OpenAIGPTDoubleHeadsModel``
33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | .. autoclass:: transformers.OpenAIGPTDoubleHeadsModel
36 |     :members:
37 | 
38 | 
39 | ``TFOpenAIGPTModel``
40 | ~~~~~~~~~~~~~~~~~~~~~~~~~
41 | 
42 | .. autoclass:: transformers.TFOpenAIGPTModel
43 |     :members:
44 | 
45 | 
46 | ``TFOpenAIGPTLMHeadModel``
47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48 | 
49 | .. autoclass:: transformers.TFOpenAIGPTLMHeadModel
50 |     :members:
51 | 
52 | 
53 | ``TFOpenAIGPTDoubleHeadsModel``
54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 | 
56 | .. autoclass:: transformers.TFOpenAIGPTDoubleHeadsModel
57 |     :members:
58 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/gpt2.rst:
--------------------------------------------------------------------------------
 1 | OpenAI GPT2
 2 | ----------------------------------------------------
 3 | 
 4 | ``GPT2Config``
 5 | ~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | .. autoclass:: transformers.GPT2Config
 8 |     :members:
 9 | 
10 | 
11 | ``GPT2Tokenizer``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.GPT2Tokenizer
15 |     :members:
16 | 
17 | 
18 | ``GPT2Model``
19 | ~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.GPT2Model
22 |     :members:
23 | 
24 | 
25 | ``GPT2LMHeadModel``
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.GPT2LMHeadModel
29 |     :members:
30 | 
31 | 
32 | ``GPT2DoubleHeadsModel``
33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | .. autoclass:: transformers.GPT2DoubleHeadsModel
36 |     :members:
37 | 
38 | 
39 | ``TFGPT2Model``
40 | ~~~~~~~~~~~~~~~~~~~~~
41 | 
42 | .. autoclass:: transformers.TFGPT2Model
43 |     :members:
44 | 
45 | 
46 | ``TFGPT2LMHeadModel``
47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
48 | 
49 | .. autoclass:: transformers.TFGPT2LMHeadModel
50 |     :members:
51 | 
52 | 
53 | ``TFGPT2DoubleHeadsModel``
54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 | 
56 | .. autoclass:: transformers.TFGPT2DoubleHeadsModel
57 |     :members:
58 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/roberta.rst:
--------------------------------------------------------------------------------
 1 | RoBERTa
 2 | ----------------------------------------------------
 3 | 
 4 | ``RobertaConfig``
 5 | ~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | .. autoclass:: transformers.RobertaConfig
 8 |     :members:
 9 | 
10 | 
11 | ``RobertaTokenizer``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.RobertaTokenizer
15 |     :members:
16 | 
17 | 
18 | ``RobertaModel``
19 | ~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.RobertaModel
22 |     :members:
23 | 
24 | 
25 | ``RobertaForMaskedLM``
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.RobertaForMaskedLM
29 |     :members:
30 | 
31 | 
32 | ``RobertaForSequenceClassification``
33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | .. autoclass:: transformers.RobertaForSequenceClassification
36 |     :members:
37 | 
38 | 
39 | ``TFRobertaModel``
40 | ~~~~~~~~~~~~~~~~~~~~
41 | 
42 | .. autoclass:: transformers.TFRobertaModel
43 |     :members:
44 | 
45 | 
46 | ``TFRobertaForMaskedLM``
47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
48 | 
49 | .. autoclass:: transformers.TFRobertaForMaskedLM
50 |     :members:
51 | 
52 | 
53 | ``TFRobertaForSequenceClassification``
54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 | 
56 | .. autoclass:: transformers.TFRobertaForSequenceClassification
57 |     :members:
58 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/transformerxl.rst:
--------------------------------------------------------------------------------
 1 | Transformer XL
 2 | ----------------------------------------------------
 3 | 
 4 | 
 5 | ``TransfoXLConfig``
 6 | ~~~~~~~~~~~~~~~~~~~~~
 7 | 
 8 | .. autoclass:: transformers.TransfoXLConfig
 9 |     :members:
10 | 
11 | 
12 | ``TransfoXLTokenizer``
13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
14 | 
15 | .. autoclass:: transformers.TransfoXLTokenizer
16 |     :members:
17 | 
18 | 
19 | ``TransfoXLModel``
20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
21 | 
22 | .. autoclass:: transformers.TransfoXLModel
23 |     :members:
24 | 
25 | 
26 | ``TransfoXLLMHeadModel``
27 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
28 | 
29 | .. autoclass:: transformers.TransfoXLLMHeadModel
30 |     :members:
31 | 
32 | 
33 | ``TFTransfoXLModel``
34 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
35 | 
36 | .. autoclass:: transformers.TFTransfoXLModel
37 |     :members:
38 | 
39 | 
40 | ``TFTransfoXLLMHeadModel``
41 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
42 | 
43 | .. autoclass:: transformers.TFTransfoXLLMHeadModel
44 |     :members:
45 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/xlm.rst:
--------------------------------------------------------------------------------
 1 | XLM
 2 | ----------------------------------------------------
 3 | 
 4 | ``XLMConfig``
 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | .. autoclass:: transformers.XLMConfig
 8 |     :members:
 9 | 
10 | ``XLMTokenizer``
11 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12 | 
13 | .. autoclass:: transformers.XLMTokenizer
14 |     :members:
15 | 
16 | ``XLMModel``
17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | 
19 | .. autoclass:: transformers.XLMModel
20 |     :members:
21 | 
22 | 
23 | ``XLMWithLMHeadModel``
24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 | 
26 | .. autoclass:: transformers.XLMWithLMHeadModel
27 |     :members:
28 | 
29 | 
30 | ``XLMForSequenceClassification``
31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 | 
33 | .. autoclass:: transformers.XLMForSequenceClassification
34 |     :members:
35 | 
36 | 
37 | ``XLMForQuestionAnswering``
38 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39 | 
40 | .. autoclass:: transformers.XLMForQuestionAnswering
41 |     :members:
42 | 
43 | 
44 | ``TFXLMModel``
45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 | 
47 | .. autoclass:: transformers.TFXLMModel
48 |     :members:
49 | 
50 | 
51 | ``TFXLMWithLMHeadModel``
52 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
53 | 
54 | .. autoclass:: transformers.TFXLMWithLMHeadModel
55 |     :members:
56 | 
57 | 
58 | ``TFXLMForSequenceClassification``
59 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
60 | 
61 | .. autoclass:: transformers.TFXLMForSequenceClassification
62 |     :members:
63 | 
64 | 
65 | ``TFXLMForQuestionAnsweringSimple``
66 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
67 | 
68 | .. autoclass:: transformers.TFXLMForQuestionAnsweringSimple
69 |     :members:
70 | 


--------------------------------------------------------------------------------
/docs/source/model_doc/xlnet.rst:
--------------------------------------------------------------------------------
 1 | XLNet
 2 | ----------------------------------------------------
 3 | 
 4 | ``XLNetConfig``
 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | .. autoclass:: transformers.XLNetConfig
 8 |     :members:
 9 | 
10 | 
11 | ``XLNetTokenizer``
12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.XLNetTokenizer
15 |     :members:
16 | 
17 | 
18 | ``XLNetModel``
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | .. autoclass:: transformers.XLNetModel
22 |     :members:
23 | 
24 | 
25 | ``XLNetLMHeadModel``
26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 | .. autoclass:: transformers.XLNetLMHeadModel
29 |     :members:
30 | 
31 | 
32 | ``XLNetForSequenceClassification``
33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
34 | 
35 | .. autoclass:: transformers.XLNetForSequenceClassification
36 |     :members:
37 | 
38 | 
39 | ``XLNetForQuestionAnswering``
40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41 | 
42 | .. autoclass:: transformers.XLNetForQuestionAnswering
43 |     :members:
44 | 
45 | 
46 | ``TFXLNetModel``
47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48 | 
49 | .. autoclass:: transformers.TFXLNetModel
50 |     :members:
51 | 
52 | 
53 | ``TFXLNetLMHeadModel``
54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 | 
56 | .. autoclass:: transformers.TFXLNetLMHeadModel
57 |     :members:
58 | 
59 | 
60 | ``TFXLNetForSequenceClassification``
61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
62 | 
63 | .. autoclass:: transformers.TFXLNetForSequenceClassification
64 |     :members:
65 | 
66 | 
67 | ``TFXLNetForQuestionAnsweringSimple``
68 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
69 | 
70 | .. autoclass:: transformers.TFXLNetForQuestionAnsweringSimple
71 |     :members:
72 | 


--------------------------------------------------------------------------------
/docs/source/model_sharing.md:
--------------------------------------------------------------------------------
 1 | # Model upload and sharing
 2 | 
 3 | Starting with `v2.2.2`, you can now upload and share your fine-tuned models with the community, using the <abbr title="Command-line interface">CLI</abbr> that's built-in to the library.
 4 | 
 5 | **First, create an account on [https://huggingface.co/join](https://huggingface.co/join)**. Then:
 6 | 
 7 | ```shell
 8 | transformers-cli login
 9 | # log in using the same credentials as on huggingface.co
10 | ```
11 | Upload your model:
12 | ```shell
13 | transformers-cli upload ./path/to/pretrained_model/
14 | 
15 | # ^^ Upload folder containing weights/tokenizer/config
16 | # saved via `.save_pretrained()`
17 | 
18 | transformers-cli upload ./config.json [--filename folder/foobar.json]
19 | 
20 | # ^^ Upload a single file
21 | # (you can optionally override its filename, which can be nested inside a folder)
22 | ```
23 | 
24 | Your model will then be accessible through its identifier, a concatenation of your username and the folder name above:
25 | ```python
26 | "username/pretrained_model"
27 | ```
28 | 
29 | Anyone can load it from code:
30 | ```python
31 | tokenizer = AutoTokenizer.from_pretrained("username/pretrained_model")
32 | model = AutoModel.from_pretrained("username/pretrained_model")
33 | ```
34 | 
35 | Finally, list all your files on S3:
36 | ```shell
37 | transformers-cli s3 ls
38 | # List all your S3 objects.
39 | ```
40 | 
41 | 


--------------------------------------------------------------------------------
/docs/source/multilingual.rst:
--------------------------------------------------------------------------------
  1 | Multi-lingual models
  2 | ================================================
  3 | 
  4 | Most of the models available in this library are mono-lingual models (English, Chinese and German). A few
  5 | multi-lingual models are available and have a different mechanisms than mono-lingual models.
  6 | This page details the usage of these models.
  7 | 
  8 | The two models that currently support multiple languages are BERT and XLM.
  9 | 
 10 | XLM
 11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 12 | 
 13 | XLM has a total of 10 different checkpoints, only one of which is mono-lingual. The 9 remaining model checkpoints can
 14 | be split in two categories: the checkpoints that make use of language embeddings, and those that don't
 15 | 
 16 | XLM & Language Embeddings
 17 | ------------------------------------------------
 18 | 
 19 | This section concerns the following checkpoints:
 20 | 
 21 | - ``xlm-mlm-ende-1024`` (Masked language modeling, English-German)
 22 | - ``xlm-mlm-enfr-1024`` (Masked language modeling, English-French)
 23 | - ``xlm-mlm-enro-1024`` (Masked language modeling, English-Romanian)
 24 | - ``xlm-mlm-xnli15-1024`` (Masked language modeling, XNLI languages)
 25 | - ``xlm-mlm-tlm-xnli15-1024`` (Masked language modeling + Translation, XNLI languages)
 26 | - ``xlm-clm-enfr-1024`` (Causal language modeling, English-French)
 27 | - ``xlm-clm-ende-1024`` (Causal language modeling, English-German)
 28 | 
 29 | These checkpoints require language embeddings that will specify the language used at inference time. These language
 30 | embeddings are represented as a tensor that is of the same shape as the input ids passed to the model. The values in
 31 | these tensors depend on the language used and are identifiable using the ``lang2id`` and ``id2lang`` attributes
 32 | from the tokenizer.
 33 | 
 34 | Here is an example using the ``xlm-clm-enfr-1024`` checkpoint (Causal language modeling, English-French):
 35 | 
 36 | 
 37 | .. code-block::
 38 | 
 39 |     import torch
 40 |     from transformers import XLMTokenizer, XLMWithLMHeadModel
 41 | 
 42 |     tokenizer = XLMTokenizer.from_pretrained("xlm-clm-1024-enfr")
 43 | 
 44 | 
 45 | The different languages this model/tokenizer handles, as well as the ids of these languages are visible using the
 46 | ``lang2id`` attribute:
 47 | 
 48 | .. code-block::
 49 | 
 50 |     print(tokenizer.lang2id)  # {'en': 0, 'fr': 1}
 51 | 
 52 | 
 53 | These ids should be used when passing a language parameter during a model pass. Let's define our inputs:
 54 | 
 55 | .. code-block::
 56 | 
 57 |     input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")]) # batch size of 1
 58 | 
 59 | 
 60 | We should now define the language embedding by using the previously defined language id. We want to create a tensor
 61 | filled with the appropriate language ids, of the same size as input_ids. For english, the id is 0:
 62 | 
 63 | .. code-block::
 64 | 
 65 |     language_id = tokenizer.lang2id['en']  # 0
 66 |     langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, ..., 0])
 67 | 
 68 |     # We reshape it to be of size (batch_size, sequence_length)
 69 |     langs = langs.view(1, -1) # is now of shape [1, sequence_length] (we have a batch size of 1)
 70 | 
 71 | 
 72 | You can then feed it all as input to your model:
 73 | 
 74 | .. code-block::
 75 | 
 76 |     outputs = model(input_ids, langs=langs)
 77 | 
 78 | 
 79 | The example `run_generation.py <https://github.com/huggingface/transformers/blob/master/examples/run_generation.py>`__
 80 | can generate text using the CLM checkpoints from XLM, using the language embeddings.
 81 | 
 82 | XLM without Language Embeddings
 83 | ------------------------------------------------
 84 | 
 85 | This section concerns the following checkpoints:
 86 | 
 87 | - ``xlm-mlm-17-1280`` (Masked language modeling, 17 languages)
 88 | - ``xlm-mlm-100-1280`` (Masked language modeling, 100 languages)
 89 | 
 90 | These checkpoints do not require language embeddings at inference time. These models are used to have generic
 91 | sentence representations, differently from previously-mentioned XLM checkpoints.
 92 | 
 93 | 
 94 | BERT
 95 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 96 | 
 97 | BERT has two checkpoints that can be used for multi-lingual tasks:
 98 | 
 99 | - ``bert-base-multilingual-uncased`` (Masked language modeling + Next sentence prediction, 102 languages)
100 | - ``bert-base-multilingual-cased`` (Masked language modeling + Next sentence prediction, 104 languages)
101 | 
102 | These checkpoints do not require language embeddings at inference time. They should identify the language
103 | used in the context and infer accordingly.


--------------------------------------------------------------------------------
/docs/source/notebooks.rst:
--------------------------------------------------------------------------------
 1 | Notebooks
 2 | ================================================
 3 | 
 4 | We include `three Jupyter Notebooks <https://github.com/huggingface/transformers/tree/master/notebooks>`_ that can be used to check that the predictions of the PyTorch model are identical to the predictions of the original TensorFlow model.
 5 | 
 6 | 
 7 | *
 8 |   The first NoteBook (\ `Comparing-TF-and-PT-models.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models.ipynb>`_\ ) extracts the hidden states of a full sequence on each layers of the TensorFlow and the PyTorch models and computes the standard deviation between them. In the given example, we get a standard deviation of 1.5e-7 to 9e-7 on the various hidden state of the models.
 9 | 
10 | *
11 |   The second NoteBook (\ `Comparing-TF-and-PT-models-SQuAD.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models-SQuAD.ipynb>`_\ ) compares the loss computed by the TensorFlow and the PyTorch models for identical initialization of the fine-tuning layer of the ``BertForQuestionAnswering`` and computes the standard deviation between them. In the given example, we get a standard deviation of 2.5e-7 between the models.
12 | 
13 | *
14 |   The third NoteBook (\ `Comparing-TF-and-PT-models-MLM-NSP.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb>`_\ ) compares the predictions computed by the TensorFlow and the PyTorch models for masked token language modeling using the pre-trained masked language modeling model.
15 | 
16 | Please follow the instructions given in the notebooks to run and modify them.
17 | 


--------------------------------------------------------------------------------
/examples/contrib/README.md:
--------------------------------------------------------------------------------
1 | # Community contributed examples
2 | 
3 | This folder contains examples which are not actively maintained (mostly contributed by the community).
4 | 
5 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working.
6 | 


--------------------------------------------------------------------------------
/examples/contrib/run_camembert.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from transformers.modeling_camembert import CamembertForMaskedLM
 4 | from transformers.tokenization_camembert import CamembertTokenizer
 5 | 
 6 | 
 7 | def fill_mask(masked_input, model, tokenizer, topk=5):
 8 |     # Adapted from https://github.com/pytorch/fairseq/blob/master/fairseq/models/roberta/hub_interface.py
 9 |     assert masked_input.count("<mask>") == 1
10 |     input_ids = torch.tensor(tokenizer.encode(masked_input, add_special_tokens=True)).unsqueeze(0)  # Batch size 1
11 |     logits = model(input_ids)[0]  # The last hidden-state is the first element of the output tuple
12 |     masked_index = (input_ids.squeeze() == tokenizer.mask_token_id).nonzero().item()
13 |     logits = logits[0, masked_index, :]
14 |     prob = logits.softmax(dim=0)
15 |     values, indices = prob.topk(k=topk, dim=0)
16 |     topk_predicted_token_bpe = " ".join(
17 |         [tokenizer.convert_ids_to_tokens(indices[i].item()) for i in range(len(indices))]
18 |     )
19 |     masked_token = tokenizer.mask_token
20 |     topk_filled_outputs = []
21 |     for index, predicted_token_bpe in enumerate(topk_predicted_token_bpe.split(" ")):
22 |         predicted_token = predicted_token_bpe.replace("\u2581", " ")
23 |         if " {0}".format(masked_token) in masked_input:
24 |             topk_filled_outputs.append(
25 |                 (
26 |                     masked_input.replace(" {0}".format(masked_token), predicted_token),
27 |                     values[index].item(),
28 |                     predicted_token,
29 |                 )
30 |             )
31 |         else:
32 |             topk_filled_outputs.append(
33 |                 (masked_input.replace(masked_token, predicted_token), values[index].item(), predicted_token,)
34 |             )
35 |     return topk_filled_outputs
36 | 
37 | 
38 | tokenizer = CamembertTokenizer.from_pretrained("camembert-base")
39 | model = CamembertForMaskedLM.from_pretrained("camembert-base")
40 | model.eval()
41 | 
42 | masked_input = "Le camembert est <mask> :)"
43 | print(fill_mask(masked_input, model, tokenizer, topk=3))
44 | 


--------------------------------------------------------------------------------
/examples/distillation/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | gitpython==3.0.2
4 | tensorboard>=1.14.0
5 | tensorboardX==1.8
6 | psutil==5.6.3
7 | scipy==1.3.1
8 | 


--------------------------------------------------------------------------------
/examples/distillation/scripts/binarized_data.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Preprocessing script before distillation.
17 | """
18 | import argparse
19 | import logging
20 | import pickle
21 | import random
22 | import time
23 | 
24 | import numpy as np
25 | 
26 | from transformers import BertTokenizer, GPT2Tokenizer, RobertaTokenizer
27 | 
28 | 
29 | logging.basicConfig(
30 |     format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
31 | )
32 | logger = logging.getLogger(__name__)
33 | 
34 | 
35 | def main():
36 |     parser = argparse.ArgumentParser(
37 |         description="Preprocess the data to avoid re-doing it several times by (tokenization + token_to_ids)."
38 |     )
39 |     parser.add_argument("--file_path", type=str, default="data/dump.txt", help="The path to the data.")
40 |     parser.add_argument("--tokenizer_type", type=str, default="bert", choices=["bert", "roberta", "gpt2"])
41 |     parser.add_argument("--tokenizer_name", type=str, default="bert-base-uncased", help="The tokenizer to use.")
42 |     parser.add_argument("--dump_file", type=str, default="data/dump", help="The dump file prefix.")
43 |     args = parser.parse_args()
44 | 
45 |     logger.info(f"Loading Tokenizer ({args.tokenizer_name})")
46 |     if args.tokenizer_type == "bert":
47 |         tokenizer = BertTokenizer.from_pretrained(args.tokenizer_name)
48 |         bos = tokenizer.special_tokens_map["cls_token"]  # `[CLS]`
49 |         sep = tokenizer.special_tokens_map["sep_token"]  # `[SEP]`
50 |     elif args.tokenizer_type == "roberta":
51 |         tokenizer = RobertaTokenizer.from_pretrained(args.tokenizer_name)
52 |         bos = tokenizer.special_tokens_map["cls_token"]  # `<s>`
53 |         sep = tokenizer.special_tokens_map["sep_token"]  # `</s>`
54 |     elif args.tokenizer_type == "gpt2":
55 |         tokenizer = GPT2Tokenizer.from_pretrained(args.tokenizer_name)
56 |         bos = tokenizer.special_tokens_map["bos_token"]  # `<|endoftext|>`
57 |         sep = tokenizer.special_tokens_map["eos_token"]  # `<|endoftext|>`
58 | 
59 |     logger.info(f"Loading text from {args.file_path}")
60 |     with open(args.file_path, "r", encoding="utf8") as fp:
61 |         data = fp.readlines()
62 | 
63 |     logger.info(f"Start encoding")
64 |     logger.info(f"{len(data)} examples to process.")
65 | 
66 |     rslt = []
67 |     iter = 0
68 |     interval = 10000
69 |     start = time.time()
70 |     for text in data:
71 |         text = f"{bos} {text.strip()} {sep}"
72 |         token_ids = tokenizer.encode(text, add_special_tokens=False)
73 |         rslt.append(token_ids)
74 | 
75 |         iter += 1
76 |         if iter % interval == 0:
77 |             end = time.time()
78 |             logger.info(f"{iter} examples processed. - {(end-start)/interval:.2f}s/expl")
79 |             start = time.time()
80 |     logger.info("Finished binarization")
81 |     logger.info(f"{len(data)} examples processed.")
82 | 
83 |     dp_file = f"{args.dump_file}.{args.tokenizer_name}.pickle"
84 |     rslt_ = [np.uint16(d) for d in rslt]
85 |     random.shuffle(rslt_)
86 |     logger.info(f"Dump to {dp_file}")
87 |     with open(dp_file, "wb") as handle:
88 |         pickle.dump(rslt_, handle, protocol=pickle.HIGHEST_PROTOCOL)
89 | 
90 | 
91 | if __name__ == "__main__":
92 |     main()
93 | 


--------------------------------------------------------------------------------
/examples/distillation/scripts/extract_distilbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Preprocessing script before training DistilBERT.
17 | Specific to BERT -> DistilBERT.
18 | """
19 | import argparse
20 | 
21 | import torch
22 | 
23 | from transformers import BertForMaskedLM
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     parser = argparse.ArgumentParser(
28 |         description="Extraction some layers of the full BertForMaskedLM or RObertaForMaskedLM for Transfer Learned Distillation"
29 |     )
30 |     parser.add_argument("--model_type", default="bert", choices=["bert"])
31 |     parser.add_argument("--model_name", default="bert-base-uncased", type=str)
32 |     parser.add_argument("--dump_checkpoint", default="serialization_dir/tf_bert-base-uncased_0247911.pth", type=str)
33 |     parser.add_argument("--vocab_transform", action="store_true")
34 |     args = parser.parse_args()
35 | 
36 |     if args.model_type == "bert":
37 |         model = BertForMaskedLM.from_pretrained(args.model_name)
38 |         prefix = "bert"
39 |     else:
40 |         raise ValueError(f'args.model_type should be "bert".')
41 | 
42 |     state_dict = model.state_dict()
43 |     compressed_sd = {}
44 | 
45 |     for w in ["word_embeddings", "position_embeddings"]:
46 |         compressed_sd[f"distilbert.embeddings.{w}.weight"] = state_dict[f"{prefix}.embeddings.{w}.weight"]
47 |     for w in ["weight", "bias"]:
48 |         compressed_sd[f"distilbert.embeddings.LayerNorm.{w}"] = state_dict[f"{prefix}.embeddings.LayerNorm.{w}"]
49 | 
50 |     std_idx = 0
51 |     for teacher_idx in [0, 2, 4, 7, 9, 11]:
52 |         for w in ["weight", "bias"]:
53 |             compressed_sd[f"distilbert.transformer.layer.{std_idx}.attention.q_lin.{w}"] = state_dict[
54 |                 f"{prefix}.encoder.layer.{teacher_idx}.attention.self.query.{w}"
55 |             ]
56 |             compressed_sd[f"distilbert.transformer.layer.{std_idx}.attention.k_lin.{w}"] = state_dict[
57 |                 f"{prefix}.encoder.layer.{teacher_idx}.attention.self.key.{w}"
58 |             ]
59 |             compressed_sd[f"distilbert.transformer.layer.{std_idx}.attention.v_lin.{w}"] = state_dict[
60 |                 f"{prefix}.encoder.layer.{teacher_idx}.attention.self.value.{w}"
61 |             ]
62 | 
63 |             compressed_sd[f"distilbert.transformer.layer.{std_idx}.attention.out_lin.{w}"] = state_dict[
64 |                 f"{prefix}.encoder.layer.{teacher_idx}.attention.output.dense.{w}"
65 |             ]
66 |             compressed_sd[f"distilbert.transformer.layer.{std_idx}.sa_layer_norm.{w}"] = state_dict[
67 |                 f"{prefix}.encoder.layer.{teacher_idx}.attention.output.LayerNorm.{w}"
68 |             ]
69 | 
70 |             compressed_sd[f"distilbert.transformer.layer.{std_idx}.ffn.lin1.{w}"] = state_dict[
71 |                 f"{prefix}.encoder.layer.{teacher_idx}.intermediate.dense.{w}"
72 |             ]
73 |             compressed_sd[f"distilbert.transformer.layer.{std_idx}.ffn.lin2.{w}"] = state_dict[
74 |                 f"{prefix}.encoder.layer.{teacher_idx}.output.dense.{w}"
75 |             ]
76 |             compressed_sd[f"distilbert.transformer.layer.{std_idx}.output_layer_norm.{w}"] = state_dict[
77 |                 f"{prefix}.encoder.layer.{teacher_idx}.output.LayerNorm.{w}"
78 |             ]
79 |         std_idx += 1
80 | 
81 |     compressed_sd[f"vocab_projector.weight"] = state_dict[f"cls.predictions.decoder.weight"]
82 |     compressed_sd[f"vocab_projector.bias"] = state_dict[f"cls.predictions.bias"]
83 |     if args.vocab_transform:
84 |         for w in ["weight", "bias"]:
85 |             compressed_sd[f"vocab_transform.{w}"] = state_dict[f"cls.predictions.transform.dense.{w}"]
86 |             compressed_sd[f"vocab_layer_norm.{w}"] = state_dict[f"cls.predictions.transform.LayerNorm.{w}"]
87 | 
88 |     print(f"N layers selected for distillation: {std_idx}")
89 |     print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")
90 | 
91 |     print(f"Save transfered checkpoint to {args.dump_checkpoint}.")
92 |     torch.save(compressed_sd, args.dump_checkpoint)
93 | 


--------------------------------------------------------------------------------
/examples/distillation/scripts/token_counts.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Preprocessing script before training the distilled model.
17 | """
18 | import argparse
19 | import logging
20 | import pickle
21 | from collections import Counter
22 | 
23 | 
24 | logging.basicConfig(
25 |     format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
26 | )
27 | logger = logging.getLogger(__name__)
28 | 
29 | if __name__ == "__main__":
30 |     parser = argparse.ArgumentParser(
31 |         description="Token Counts for smoothing the masking probabilities in MLM (cf XLM/word2vec)"
32 |     )
33 |     parser.add_argument(
34 |         "--data_file", type=str, default="data/dump.bert-base-uncased.pickle", help="The binarized dataset."
35 |     )
36 |     parser.add_argument(
37 |         "--token_counts_dump", type=str, default="data/token_counts.bert-base-uncased.pickle", help="The dump file."
38 |     )
39 |     parser.add_argument("--vocab_size", default=30522, type=int)
40 |     args = parser.parse_args()
41 | 
42 |     logger.info(f"Loading data from {args.data_file}")
43 |     with open(args.data_file, "rb") as fp:
44 |         data = pickle.load(fp)
45 | 
46 |     logger.info("Counting occurences for MLM.")
47 |     counter = Counter()
48 |     for tk_ids in data:
49 |         counter.update(tk_ids)
50 |     counts = [0] * args.vocab_size
51 |     for k, v in counter.items():
52 |         counts[k] = v
53 | 
54 |     logger.info(f"Dump to {args.token_counts_dump}")
55 |     with open(args.token_counts_dump, "wb") as handle:
56 |         pickle.dump(counts, handle, protocol=pickle.HIGHEST_PROTOCOL)
57 | 


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-multilingual-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 119547
14 |   }
15 |   


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilbert-base-uncased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 30522
14 |   }
15 |   


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilgpt2.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"initializer_range": 0.02,
 3 | 	"layer_norm_epsilon": 0.00001,
 4 | 	"n_ctx": 1024,
 5 | 	"n_embd": 768,
 6 | 	"n_head": 12,
 7 | 	"n_layer": 6,
 8 | 	"n_positions": 1024,
 9 | 	"vocab_size": 50257
10 | }


--------------------------------------------------------------------------------
/examples/distillation/training_configs/distilroberta-base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "vocab_size": 50265,
 3 |     "hidden_size": 768,
 4 |     "num_hidden_layers": 6,
 5 |     "num_attention_heads": 12,
 6 |     "intermediate_size": 3072,
 7 |     "hidden_act": "gelu",
 8 |     "hidden_dropout_prob": 0.1,
 9 |     "attention_probs_dropout_prob": 0.1,
10 |     "max_position_embeddings": 514,
11 |     "type_vocab_size": 1,
12 |     "initializer_range": 0.02,
13 |     "layer_norm_eps": 0.00001
14 | }


--------------------------------------------------------------------------------
/examples/pplm/README.md:
--------------------------------------------------------------------------------
 1 | # Plug and Play Language Models: a Simple Approach to Controlled Text Generation
 2 | 
 3 | Authors: [Sumanth Dathathri](https://dathath.github.io/), [Andrea Madotto](https://andreamad8.github.io/), Janice Lan, Jane Hung, Eric Frank, [Piero Molino](https://w4nderlu.st/), [Jason Yosinski](http://yosinski.com/), and [Rosanne Liu](http://www.rosanneliu.com/)
 4 | 
 5 | This folder contains the original code used to run the Plug and Play Language Model (PPLM).
 6 | 
 7 | Paper link: https://arxiv.org/abs/1912.02164
 8 | 
 9 | Blog link: https://eng.uber.com/pplm
10 | 
11 | Please check out the repo under uber-research for more information: https://github.com/uber-research/PPLM
12 | 
13 | 
14 | ## Setup
15 | 
16 | ```bash
17 | git clone https://github.com/huggingface/transformers && cd transformers
18 | pip install .
19 | pip install nltk torchtext # additional requirements.
20 | cd examples/pplm
21 | ```
22 | 
23 | ## PPLM-BoW 
24 | 
25 | ### Example command for bag-of-words control
26 | 
27 | ```bash
28 | python run_pplm.py -B military --cond_text "The potato" --length 50 --gamma 1.5 --num_iterations 3 --num_samples 10 --stepsize 0.03 --window_length 5 --kl_scale 0.01 --gm_scale 0.99 --colorama --sample
29 | ```
30 | 
31 | ### Tuning hyperparameters for bag-of-words control
32 | 
33 | 1. Increase `--stepsize` to intensify topic control, and decrease its value to soften the control. `--stepsize 0` recovers the original uncontrolled GPT-2 model. 
34 | 
35 | 2. If the language being generated is repetitive (For e.g. "science science experiment experiment"), there are several options to consider: </br>
36 | 	a) Reduce the `--stepsize` </br>
37 | 	b) Increase `--kl_scale` (the KL-loss coefficient) or decrease `--gm_scale` (the gm-scaling term) </br>
38 | 	c) Add `--grad-length xx` where xx is an (integer <= length, e.g. `--grad-length 30`).</br>
39 | 
40 | 
41 | ## PPLM-Discrim
42 | 
43 | ### Example command for discriminator based sentiment control
44 | 
45 | ```bash
46 | python run_pplm.py -D sentiment --class_label 2 --cond_text "My dog died" --length 50 --gamma 1.0 --num_iterations 10 --num_samples 10 --stepsize 0.04 --kl_scale 0.01 --gm_scale 0.95 --sample
47 | ```
48 | 
49 | ### Tuning hyperparameters for discriminator control
50 | 
51 | 1. Increase `--stepsize` to intensify topic control, and decrease its value to soften the control. `--stepsize 0` recovers the original uncontrolled GPT-2 model. 
52 | 
53 | 2. Use `--class_label 3` for negative, and `--class_label 2` for positive
54 | 
55 | 


--------------------------------------------------------------------------------
/examples/pplm/imgs/headfigure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/examples/pplm/imgs/headfigure.png


--------------------------------------------------------------------------------
/examples/pplm/imgs/wooly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/examples/pplm/imgs/wooly.png


--------------------------------------------------------------------------------
/examples/pplm/pplm_classification_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class ClassificationHead(torch.nn.Module):
 5 |     """Classification Head for  transformer encoders"""
 6 | 
 7 |     def __init__(self, class_size, embed_size):
 8 |         super(ClassificationHead, self).__init__()
 9 |         self.class_size = class_size
10 |         self.embed_size = embed_size
11 |         # self.mlp1 = torch.nn.Linear(embed_size, embed_size)
12 |         # self.mlp2 = (torch.nn.Linear(embed_size, class_size))
13 |         self.mlp = torch.nn.Linear(embed_size, class_size)
14 | 
15 |     def forward(self, hidden_state):
16 |         # hidden_state = F.relu(self.mlp1(hidden_state))
17 |         # hidden_state = self.mlp2(hidden_state)
18 |         logits = self.mlp(hidden_state)
19 |         return logits
20 | 


--------------------------------------------------------------------------------
/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorboardX
2 | tensorboard
3 | scikit-learn
4 | seqeval
5 | 


--------------------------------------------------------------------------------
/examples/run_tf_glue.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import tensorflow as tf
  4 | import tensorflow_datasets
  5 | 
  6 | from transformers import (
  7 |     BertConfig,
  8 |     BertForSequenceClassification,
  9 |     BertTokenizer,
 10 |     TFBertForSequenceClassification,
 11 |     glue_convert_examples_to_features,
 12 |     glue_processors,
 13 | )
 14 | 
 15 | 
 16 | # script parameters
 17 | BATCH_SIZE = 32
 18 | EVAL_BATCH_SIZE = BATCH_SIZE * 2
 19 | USE_XLA = False
 20 | USE_AMP = False
 21 | EPOCHS = 3
 22 | 
 23 | TASK = "mrpc"
 24 | 
 25 | if TASK == "sst-2":
 26 |     TFDS_TASK = "sst2"
 27 | elif TASK == "sts-b":
 28 |     TFDS_TASK = "stsb"
 29 | else:
 30 |     TFDS_TASK = TASK
 31 | 
 32 | num_labels = len(glue_processors[TASK]().get_labels())
 33 | print(num_labels)
 34 | 
 35 | tf.config.optimizer.set_jit(USE_XLA)
 36 | tf.config.optimizer.set_experimental_options({"auto_mixed_precision": USE_AMP})
 37 | 
 38 | # Load tokenizer and model from pretrained model/vocabulary. Specify the number of labels to classify (2+: classification, 1: regression)
 39 | config = BertConfig.from_pretrained("bert-base-cased", num_labels=num_labels)
 40 | tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
 41 | model = TFBertForSequenceClassification.from_pretrained("bert-base-cased", config=config)
 42 | 
 43 | # Load dataset via TensorFlow Datasets
 44 | data, info = tensorflow_datasets.load(f"glue/{TFDS_TASK}", with_info=True)
 45 | train_examples = info.splits["train"].num_examples
 46 | 
 47 | # MNLI expects either validation_matched or validation_mismatched
 48 | valid_examples = info.splits["validation"].num_examples
 49 | 
 50 | # Prepare dataset for GLUE as a tf.data.Dataset instance
 51 | train_dataset = glue_convert_examples_to_features(data["train"], tokenizer, 128, TASK)
 52 | 
 53 | # MNLI expects either validation_matched or validation_mismatched
 54 | valid_dataset = glue_convert_examples_to_features(data["validation"], tokenizer, 128, TASK)
 55 | train_dataset = train_dataset.shuffle(128).batch(BATCH_SIZE).repeat(-1)
 56 | valid_dataset = valid_dataset.batch(EVAL_BATCH_SIZE)
 57 | 
 58 | # Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule
 59 | opt = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08)
 60 | if USE_AMP:
 61 |     # loss scaling is currently required when using mixed precision
 62 |     opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt, "dynamic")
 63 | 
 64 | 
 65 | if num_labels == 1:
 66 |     loss = tf.keras.losses.MeanSquaredError()
 67 | else:
 68 |     loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
 69 | 
 70 | metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
 71 | model.compile(optimizer=opt, loss=loss, metrics=[metric])
 72 | 
 73 | # Train and evaluate using tf.keras.Model.fit()
 74 | train_steps = train_examples // BATCH_SIZE
 75 | valid_steps = valid_examples // EVAL_BATCH_SIZE
 76 | 
 77 | history = model.fit(
 78 |     train_dataset,
 79 |     epochs=EPOCHS,
 80 |     steps_per_epoch=train_steps,
 81 |     validation_data=valid_dataset,
 82 |     validation_steps=valid_steps,
 83 | )
 84 | 
 85 | # Save TF2 model
 86 | os.makedirs("./save/", exist_ok=True)
 87 | model.save_pretrained("./save/")
 88 | 
 89 | if TASK == "mrpc":
 90 |     # Load the TensorFlow model in PyTorch for inspection
 91 |     # This is to demo the interoperability between the two frameworks, you don't have to
 92 |     # do this in real life (you can run the inference on the TF model).
 93 |     pytorch_model = BertForSequenceClassification.from_pretrained("./save/", from_tf=True)
 94 | 
 95 |     # Quickly test a few predictions - MRPC is a paraphrasing task, let's see if our model learned the task
 96 |     sentence_0 = "This research was consistent with his findings."
 97 |     sentence_1 = "His findings were compatible with this research."
 98 |     sentence_2 = "His findings were not compatible with this research."
 99 |     inputs_1 = tokenizer.encode_plus(sentence_0, sentence_1, add_special_tokens=True, return_tensors="pt")
100 |     inputs_2 = tokenizer.encode_plus(sentence_0, sentence_2, add_special_tokens=True, return_tensors="pt")
101 | 
102 |     del inputs_1["special_tokens_mask"]
103 |     del inputs_2["special_tokens_mask"]
104 | 
105 |     pred_1 = pytorch_model(**inputs_1)[0].argmax().item()
106 |     pred_2 = pytorch_model(**inputs_2)[0].argmax().item()
107 |     print("sentence_1 is", "a paraphrase" if pred_1 else "not a paraphrase", "of sentence_0")
108 |     print("sentence_2 is", "a paraphrase" if pred_2 else "not a paraphrase", "of sentence_0")
109 | 


--------------------------------------------------------------------------------
/examples/summarization/README.md:
--------------------------------------------------------------------------------
 1 | # Text Summarization with Pretrained Encoders
 2 | 
 3 | This folder contains part of the code necessary to reproduce the results on abstractive summarization from the article [Text Summarization with Pretrained Encoders](https://arxiv.org/pdf/1908.08345.pdf) by [Yang Liu](https://nlp-yang.github.io/) and [Mirella Lapata](https://homepages.inf.ed.ac.uk/mlap/). It can also be used to summarize any document.
 4 | 
 5 | The original code can be found on the Yang Liu's [github repository](https://github.com/nlpyang/PreSumm).
 6 | 
 7 | The model is loaded with the pre-trained weights for the abstractive summarization model trained on the CNN/Daily Mail dataset with an extractive and then abstractive tasks.
 8 | 
 9 | ## Setup
10 | 
11 | ```
12 | git clone https://github.com/huggingface/transformers && cd transformers
13 | pip install .
14 | pip install nltk py-rouge
15 | cd examples/summarization
16 | ```
17 | 
18 | ## Reproduce the authors' results on ROUGE
19 | 
20 | To be able to reproduce the authors' results on the CNN/Daily Mail dataset you first need to download both CNN and Daily Mail datasets [from Kyunghyun Cho's website](https://cs.nyu.edu/~kcho/DMQA/) (the links next to "Stories") in the same folder. Then uncompress the archives by running:
21 | 
22 | ```bash
23 | tar -xvf cnn_stories.tgz && tar -xvf dailymail_stories.tgz
24 | ```
25 | 
26 | And move all the stories to the same folder. We will refer as `$DATA_PATH` the path to where you uncompressed both archive. Then run the following in the same folder as `run_summarization.py`:
27 | 
28 | ```bash
29 | python run_summarization.py \
30 |     --documents_dir $DATA_PATH \
31 |     --summaries_output_dir $SUMMARIES_PATH \ # optional
32 |     --no_cuda false \
33 |     --batch_size 4 \
34 |     --min_length 50 \
35 |     --max_length 200 \
36 |     --beam_size 5 \
37 |     --alpha 0.95 \
38 |     --block_trigram true \
39 |     --compute_rouge true
40 | ```
41 | 
42 | The scripts executes on GPU if one is available and if `no_cuda` is not set to `true`. Inference on multiple GPUs is not suported yet. The ROUGE scores will be displayed in the console at the end of evaluation and written in a `rouge_scores.txt` file. The script takes 30 hours to compute with a single Tesla V100 GPU and a batch size of 10 (300,000 texts to summarize).
43 | 
44 | ## Summarize any text
45 | 
46 | Put the documents that you would like to summarize in a folder (the path to which is referred to as `$DATA_PATH` below) and run the following in the same folder as `run_summarization.py`:
47 | 
48 | ```bash
49 | python run_summarization.py \
50 |     --documents_dir $DATA_PATH \
51 |     --summaries_output_dir $SUMMARIES_PATH \ # optional
52 |     --no_cuda false \
53 |     --batch_size 4 \
54 |     --min_length 50 \
55 |     --max_length 200 \
56 |     --beam_size 5 \
57 |     --alpha 0.95 \
58 |     --block_trigram true \
59 | ```
60 | 
61 | You may want to play around with `min_length`, `max_length` and `alpha` to suit your use case. If you want to compute ROUGE on another dataset you will need to tweak the stories/summaries import in `utils_summarization.py` and tell it where to fetch the reference summaries.
62 | 


--------------------------------------------------------------------------------
/examples/summarization/configuration_bertabs.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019 The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ BertAbs configuration """
17 | import logging
18 | 
19 | from transformers import PretrainedConfig
20 | 
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | 
25 | BERTABS_FINETUNED_CONFIG_MAP = {
26 |     "bertabs-finetuned-cnndm": "https://s3.amazonaws.com/models.huggingface.co/bert/remi/bertabs-finetuned-cnndm-extractive-abstractive-summarization-config.json",
27 | }
28 | 
29 | 
30 | class BertAbsConfig(PretrainedConfig):
31 |     r""" Class to store the configuration of the BertAbs model.
32 | 
33 |     Arguments:
34 |         vocab_size: int
35 |             Number of tokens in the vocabulary.
36 |         max_pos: int
37 |             The maximum sequence length that this model will be used with.
38 |         enc_layer: int
39 |             The numner of hidden layers in the Transformer encoder.
40 |         enc_hidden_size: int
41 |             The size of the encoder's layers.
42 |         enc_heads: int
43 |             The number of attention heads for each attention layer in the encoder.
44 |         enc_ff_size: int
45 |             The size of the encoder's feed-forward layers.
46 |         enc_dropout: int
47 |             The dropout probabilitiy for all fully connected layers in the
48 |             embeddings, layers, pooler and also the attention probabilities in
49 |             the encoder.
50 |         dec_layer: int
51 |             The numner of hidden layers in the decoder.
52 |         dec_hidden_size: int
53 |             The size of the decoder's layers.
54 |         dec_heads: int
55 |             The number of attention heads for each attention layer in the decoder.
56 |         dec_ff_size: int
57 |             The size of the decoder's feed-forward layers.
58 |         dec_dropout: int
59 |             The dropout probabilitiy for all fully connected layers in the
60 |             embeddings, layers, pooler and also the attention probabilities in
61 |             the decoder.
62 |     """
63 | 
64 |     pretrained_config_archive_map = BERTABS_FINETUNED_CONFIG_MAP
65 | 
66 |     def __init__(
67 |         self,
68 |         vocab_size=30522,
69 |         max_pos=512,
70 |         enc_layers=6,
71 |         enc_hidden_size=512,
72 |         enc_heads=8,
73 |         enc_ff_size=512,
74 |         enc_dropout=0.2,
75 |         dec_layers=6,
76 |         dec_hidden_size=768,
77 |         dec_heads=8,
78 |         dec_ff_size=2048,
79 |         dec_dropout=0.2,
80 |         **kwargs,
81 |     ):
82 |         super(BertAbsConfig, self).__init__(**kwargs)
83 | 
84 |         self.vocab_size = vocab_size
85 |         self.max_pos = max_pos
86 | 
87 |         self.enc_layers = enc_layers
88 |         self.enc_hidden_size = enc_hidden_size
89 |         self.enc_heads = enc_heads
90 |         self.enc_ff_size = enc_ff_size
91 |         self.enc_dropout = enc_dropout
92 | 
93 |         self.dec_layers = dec_layers
94 |         self.dec_hidden_size = dec_hidden_size
95 |         self.dec_heads = dec_heads
96 |         self.dec_ff_size = dec_ff_size
97 |         self.dec_dropout = dec_dropout
98 | 


--------------------------------------------------------------------------------
/examples/summarization/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | # For ROUGE
4 | nltk
5 | py-rouge
6 | 


--------------------------------------------------------------------------------
/examples/test_examples.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 HuggingFace Inc..
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import argparse
 18 | import logging
 19 | import sys
 20 | import unittest
 21 | from unittest.mock import patch
 22 | 
 23 | import run_generation
 24 | import run_glue
 25 | import run_squad
 26 | 
 27 | 
 28 | logging.basicConfig(level=logging.DEBUG)
 29 | 
 30 | logger = logging.getLogger()
 31 | 
 32 | 
 33 | def get_setup_file():
 34 |     parser = argparse.ArgumentParser()
 35 |     parser.add_argument("-f")
 36 |     args = parser.parse_args()
 37 |     return args.f
 38 | 
 39 | 
 40 | class ExamplesTests(unittest.TestCase):
 41 |     def test_run_glue(self):
 42 |         stream_handler = logging.StreamHandler(sys.stdout)
 43 |         logger.addHandler(stream_handler)
 44 | 
 45 |         testargs = [
 46 |             "run_glue.py",
 47 |             "--data_dir=./examples/tests_samples/MRPC/",
 48 |             "--task_name=mrpc",
 49 |             "--do_train",
 50 |             "--do_eval",
 51 |             "--output_dir=./examples/tests_samples/temp_dir",
 52 |             "--per_gpu_train_batch_size=2",
 53 |             "--per_gpu_eval_batch_size=1",
 54 |             "--learning_rate=1e-4",
 55 |             "--max_steps=10",
 56 |             "--warmup_steps=2",
 57 |             "--overwrite_output_dir",
 58 |             "--seed=42",
 59 |         ]
 60 |         model_type, model_name = ("--model_type=bert", "--model_name_or_path=bert-base-uncased")
 61 |         with patch.object(sys, "argv", testargs + [model_type, model_name]):
 62 |             result = run_glue.main()
 63 |             for value in result.values():
 64 |                 self.assertGreaterEqual(value, 0.75)
 65 | 
 66 |     def test_run_squad(self):
 67 |         stream_handler = logging.StreamHandler(sys.stdout)
 68 |         logger.addHandler(stream_handler)
 69 | 
 70 |         testargs = [
 71 |             "run_squad.py",
 72 |             "--data_dir=./examples/tests_samples/SQUAD",
 73 |             "--model_name=bert-base-uncased",
 74 |             "--output_dir=./examples/tests_samples/temp_dir",
 75 |             "--max_steps=10",
 76 |             "--warmup_steps=2",
 77 |             "--do_train",
 78 |             "--do_eval",
 79 |             "--version_2_with_negative",
 80 |             "--learning_rate=2e-4",
 81 |             "--per_gpu_train_batch_size=2",
 82 |             "--per_gpu_eval_batch_size=1",
 83 |             "--overwrite_output_dir",
 84 |             "--seed=42",
 85 |         ]
 86 |         model_type, model_name = ("--model_type=bert", "--model_name_or_path=bert-base-uncased")
 87 |         with patch.object(sys, "argv", testargs + [model_type, model_name]):
 88 |             result = run_squad.main()
 89 |             self.assertGreaterEqual(result["f1"], 30)
 90 |             self.assertGreaterEqual(result["exact"], 30)
 91 | 
 92 |     def test_generation(self):
 93 |         stream_handler = logging.StreamHandler(sys.stdout)
 94 |         logger.addHandler(stream_handler)
 95 | 
 96 |         testargs = ["run_generation.py", "--prompt=Hello", "--length=10", "--seed=42"]
 97 |         model_type, model_name = ("--model_type=openai-gpt", "--model_name_or_path=openai-gpt")
 98 |         with patch.object(sys, "argv", testargs + [model_type, model_name]):
 99 |             result = run_generation.main()
100 |             self.assertGreaterEqual(len(result), 10)
101 | 


--------------------------------------------------------------------------------
/examples/tests_samples/.gitignore:
--------------------------------------------------------------------------------
1 | *.*
2 | cache*
3 | temp*
4 | !*.tsv
5 | !*.json
6 | !.gitignore


--------------------------------------------------------------------------------
/examples/tests_samples/MRPC/dev.tsv:
--------------------------------------------------------------------------------
1 | ﻿Quality	#1 ID	#2 ID	#1 String	#2 String
2 | 1	1355540	1355592	He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .	" The foodservice pie business does not fit our long-term growth strategy .
3 | 0	2029631	2029565	Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .	His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0	487993	487952	The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .	The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1	1989515	1989458	The AFL-CIO is waiting until October to decide if it will endorse a candidate .	The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0	1783137	1782659	No dates have been set for the civil or the criminal trial .	No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1	3039165	3039036	Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .	It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/examples/tests_samples/MRPC/train.tsv:
--------------------------------------------------------------------------------
1 | ﻿Quality	#1 ID	#2 ID	#1 String	#2 String
2 | 1	1355540	1355592	He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .	" The foodservice pie business does not fit our long-term growth strategy .
3 | 0	2029631	2029565	Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .	His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0	487993	487952	The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .	The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1	1989515	1989458	The AFL-CIO is waiting until October to decide if it will endorse a candidate .	The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0	1783137	1782659	No dates have been set for the civil or the criminal trial .	No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1	3039165	3039036	Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .	It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | ensure_newline_before_comments = True
 3 | force_grid_wrap = 0
 4 | include_trailing_comma = True
 5 | known_first_party = transformers
 6 | known_third_party =
 7 |     fairseq
 8 |     fastprogress
 9 |     git
10 |     MeCab
11 |     nltk
12 |     packaging
13 |     PIL
14 |     psutil
15 |     seqeval
16 |     sklearn
17 |     tensorboardX
18 |     tensorflow_datasets
19 |     torchtext
20 |     torchvision
21 | 
22 | line_length = 119
23 | lines_after_imports = 2
24 | multi_line_output = 3
25 | use_parentheses = True
26 | 
27 | [flake8]
28 | ignore = E203, E501, W503
29 | max-line-length = 119
30 | 


--------------------------------------------------------------------------------
/src/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from argparse import ArgumentParser
 3 | 
 4 | 
 5 | class BaseTransformersCLICommand(ABC):
 6 |     @staticmethod
 7 |     @abstractmethod
 8 |     def register_subcommand(parser: ArgumentParser):
 9 |         raise NotImplementedError()
10 | 
11 |     @abstractmethod
12 |     def run(self):
13 |         raise NotImplementedError()
14 | 


--------------------------------------------------------------------------------
/src/transformers/commands/download.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from transformers.commands import BaseTransformersCLICommand
 4 | 
 5 | 
 6 | def download_command_factory(args):
 7 |     return DownloadCommand(args.model, args.cache_dir, args.force)
 8 | 
 9 | 
10 | class DownloadCommand(BaseTransformersCLICommand):
11 |     @staticmethod
12 |     def register_subcommand(parser: ArgumentParser):
13 |         download_parser = parser.add_parser("download")
14 |         download_parser.add_argument(
15 |             "--cache-dir", type=str, default=None, help="Path to location to store the models"
16 |         )
17 |         download_parser.add_argument(
18 |             "--force", action="store_true", help="Force the model to be download even if already in cache-dir"
19 |         )
20 |         download_parser.add_argument("model", type=str, help="Name of the model to download")
21 |         download_parser.set_defaults(func=download_command_factory)
22 | 
23 |     def __init__(self, model: str, cache: str, force: bool):
24 |         self._model = model
25 |         self._cache = cache
26 |         self._force = force
27 | 
28 |     def run(self):
29 |         from transformers import AutoModel, AutoTokenizer
30 | 
31 |         AutoModel.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
32 |         AutoTokenizer.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
33 | 


--------------------------------------------------------------------------------
/src/transformers/commands/run.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from argparse import ArgumentParser
 3 | 
 4 | from transformers.commands import BaseTransformersCLICommand
 5 | from transformers.pipelines import SUPPORTED_TASKS, Pipeline, PipelineDataFormat, pipeline
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
 9 | 
10 | 
11 | def try_infer_format_from_ext(path: str):
12 |     if not path:
13 |         return "pipe"
14 | 
15 |     for ext in PipelineDataFormat.SUPPORTED_FORMATS:
16 |         if path.endswith(ext):
17 |             return ext
18 | 
19 |     raise Exception(
20 |         "Unable to determine file format from file extension {}. "
21 |         "Please provide the format through --format {}".format(path, PipelineDataFormat.SUPPORTED_FORMATS)
22 |     )
23 | 
24 | 
25 | def run_command_factory(args):
26 |     nlp = pipeline(
27 |         task=args.task,
28 |         model=args.model if args.model else None,
29 |         config=args.config,
30 |         tokenizer=args.tokenizer,
31 |         device=args.device,
32 |     )
33 |     format = try_infer_format_from_ext(args.input) if args.format == "infer" else args.format
34 |     reader = PipelineDataFormat.from_str(
35 |         format=format,
36 |         output_path=args.output,
37 |         input_path=args.input,
38 |         column=args.column if args.column else nlp.default_input_names,
39 |         overwrite=args.overwrite,
40 |     )
41 |     return RunCommand(nlp, reader)
42 | 
43 | 
44 | class RunCommand(BaseTransformersCLICommand):
45 |     def __init__(self, nlp: Pipeline, reader: PipelineDataFormat):
46 |         self._nlp = nlp
47 |         self._reader = reader
48 | 
49 |     @staticmethod
50 |     def register_subcommand(parser: ArgumentParser):
51 |         run_parser = parser.add_parser("run", help="Run a pipeline through the CLI")
52 |         run_parser.add_argument("--task", choices=SUPPORTED_TASKS.keys(), help="Task to run")
53 |         run_parser.add_argument("--input", type=str, help="Path to the file to use for inference")
54 |         run_parser.add_argument("--output", type=str, help="Path to the file that will be used post to write results.")
55 |         run_parser.add_argument("--model", type=str, help="Name or path to the model to instantiate.")
56 |         run_parser.add_argument("--config", type=str, help="Name or path to the model's config to instantiate.")
57 |         run_parser.add_argument(
58 |             "--tokenizer", type=str, help="Name of the tokenizer to use. (default: same as the model name)"
59 |         )
60 |         run_parser.add_argument(
61 |             "--column",
62 |             type=str,
63 |             help="Name of the column to use as input. (For multi columns input as QA use column1,columns2)",
64 |         )
65 |         run_parser.add_argument(
66 |             "--format",
67 |             type=str,
68 |             default="infer",
69 |             choices=PipelineDataFormat.SUPPORTED_FORMATS,
70 |             help="Input format to read from",
71 |         )
72 |         run_parser.add_argument(
73 |             "--device",
74 |             type=int,
75 |             default=-1,
76 |             help="Indicate the device to run onto, -1 indicates CPU, >= 0 indicates GPU (default: -1)",
77 |         )
78 |         run_parser.add_argument("--overwrite", action="store_true", help="Allow overwriting the output file.")
79 |         run_parser.set_defaults(func=run_command_factory)
80 | 
81 |     def run(self):
82 |         nlp, outputs = self._nlp, []
83 | 
84 |         for entry in self._reader:
85 |             output = nlp(**entry) if self._reader.is_multi_columns else nlp(entry)
86 |             if isinstance(output, dict):
87 |                 outputs.append(output)
88 |             else:
89 |                 outputs += output
90 | 
91 |         # Saving data
92 |         if self._nlp.binary_output:
93 |             binary_path = self._reader.save_binary(outputs)
94 |             logger.warning("Current pipeline requires output to be in binary format, saving at {}".format(binary_path))
95 |         else:
96 |             self._reader.save(outputs)
97 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_camembert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ CamemBERT configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | from .configuration_roberta import RobertaConfig
22 | 
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
27 |     "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-config.json",
28 | }
29 | 
30 | 
31 | class CamembertConfig(RobertaConfig):
32 |     pretrained_config_archive_map = CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
33 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_distilbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ DistilBERT model configuration """
16 | 
17 | 
18 | import logging
19 | 
20 | from .configuration_utils import PretrainedConfig
21 | 
22 | 
23 | logger = logging.getLogger(__name__)
24 | 
25 | DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
26 |     "distilbert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-config.json",
27 |     "distilbert-base-uncased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-distilled-squad-config.json",
28 |     "distilbert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-german-cased-config.json",
29 |     "distilbert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-multilingual-cased-config.json",
30 | }
31 | 
32 | 
33 | class DistilBertConfig(PretrainedConfig):
34 |     pretrained_config_archive_map = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
35 | 
36 |     def __init__(
37 |         self,
38 |         vocab_size=30522,
39 |         max_position_embeddings=512,
40 |         sinusoidal_pos_embds=False,
41 |         n_layers=6,
42 |         n_heads=12,
43 |         dim=768,
44 |         hidden_dim=4 * 768,
45 |         dropout=0.1,
46 |         attention_dropout=0.1,
47 |         activation="gelu",
48 |         initializer_range=0.02,
49 |         tie_weights_=True,
50 |         qa_dropout=0.1,
51 |         seq_classif_dropout=0.2,
52 |         **kwargs
53 |     ):
54 |         super(DistilBertConfig, self).__init__(**kwargs)
55 |         self.vocab_size = vocab_size
56 |         self.max_position_embeddings = max_position_embeddings
57 |         self.sinusoidal_pos_embds = sinusoidal_pos_embds
58 |         self.n_layers = n_layers
59 |         self.n_heads = n_heads
60 |         self.dim = dim
61 |         self.hidden_dim = hidden_dim
62 |         self.dropout = dropout
63 |         self.attention_dropout = attention_dropout
64 |         self.activation = activation
65 |         self.initializer_range = initializer_range
66 |         self.tie_weights_ = tie_weights_
67 |         self.qa_dropout = qa_dropout
68 |         self.seq_classif_dropout = seq_classif_dropout
69 | 
70 |     @property
71 |     def hidden_size(self):
72 |         return self.dim
73 | 
74 |     @property
75 |     def num_attention_heads(self):
76 |         return self.n_heads
77 | 
78 |     @property
79 |     def num_hidden_layers(self):
80 |         return self.n_layers
81 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_mmbt.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # Copyright (c) HuggingFace Inc. team.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ MMBT configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | 
25 | class MMBTConfig(object):
26 |     """Configuration class to store the configuration of a `MMBT Model`.
27 | 
28 |     Args:
29 |         config: config of the underlying Transformer models. It's values are copied over to use a single config.
30 |         num_labels: Size of final Linear layer for classification.
31 |         modal_hidden_size: Embedding dimension of the non-text modality encoder.
32 |     """
33 | 
34 |     def __init__(self, config, num_labels=None, modal_hidden_size=2048):
35 |         self.__dict__ = config.__dict__
36 |         self.modal_hidden_size = modal_hidden_size
37 |         if num_labels:
38 |             self.num_labels = num_labels
39 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_roberta.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ RoBERTa configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | from .configuration_bert import BertConfig
22 | 
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
27 |     "roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-config.json",
28 |     "roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-config.json",
29 |     "roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-config.json",
30 |     "distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-config.json",
31 |     "roberta-base-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-openai-detector-config.json",
32 |     "roberta-large-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-openai-detector-config.json",
33 | }
34 | 
35 | 
36 | class RobertaConfig(BertConfig):
37 |     pretrained_config_archive_map = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
38 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_t5.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2010, The T5 Authors and HuggingFace Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """ T5 model configuration """
 16 | 
 17 | 
 18 | import logging
 19 | 
 20 | from .configuration_utils import PretrainedConfig
 21 | 
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | T5_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 26 |     "t5-small": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-small-config.json",
 27 |     "t5-base": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json",
 28 |     "t5-large": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-large-config.json",
 29 |     "t5-3b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-3b-config.json",
 30 |     "t5-11b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-11b-config.json",
 31 | }
 32 | 
 33 | 
 34 | class T5Config(PretrainedConfig):
 35 |     r"""
 36 |         :class:`~transformers.T5Config` is the configuration class to store the configuration of a
 37 |         `T5Model`.
 38 | 
 39 | 
 40 |         Arguments:
 41 |             vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `T5Model`.
 42 |             hidden_size: Size of the encoder layers and the pooler layer.
 43 |             num_hidden_layers: Number of hidden layers in the Transformer encoder.
 44 |             num_attention_heads: Number of attention heads for each attention layer in
 45 |                 the Transformer encoder.
 46 |             intermediate_size: The size of the "intermediate" (i.e., feed-forward)
 47 |                 layer in the Transformer encoder.
 48 |             hidden_act: The non-linear activation function (function or string) in the
 49 |                 encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
 50 |             hidden_dropout_prob: The dropout probabilitiy for all fully connected
 51 |                 layers in the embeddings, encoder, and pooler.
 52 |             attention_probs_dropout_prob: The dropout ratio for the attention
 53 |                 probabilities.
 54 |             max_position_embeddings: The maximum sequence length that this model might
 55 |                 ever be used with. Typically set this to something large just in case
 56 |                 (e.g., 512 or 1024 or 2048).
 57 |             type_vocab_size: The vocabulary size of the `token_type_ids` passed into
 58 |                 `T5Model`.
 59 |             initializer_factor: A factor for initializing all weight matrices (should be kept to 1.0, used for initialization testing).
 60 |             layer_norm_eps: The epsilon used by LayerNorm.
 61 |     """
 62 |     pretrained_config_archive_map = T5_PRETRAINED_CONFIG_ARCHIVE_MAP
 63 | 
 64 |     def __init__(
 65 |         self,
 66 |         vocab_size=32128,
 67 |         n_positions=512,
 68 |         d_model=512,
 69 |         d_kv=64,
 70 |         d_ff=2048,
 71 |         num_layers=6,
 72 |         num_heads=8,
 73 |         relative_attention_num_buckets=32,
 74 |         dropout_rate=0.1,
 75 |         layer_norm_epsilon=1e-6,
 76 |         initializer_factor=1.0,
 77 |         **kwargs
 78 |     ):
 79 |         super(T5Config, self).__init__(**kwargs)
 80 |         self.vocab_size = vocab_size
 81 |         self.n_positions = n_positions
 82 |         self.d_model = d_model
 83 |         self.d_kv = d_kv
 84 |         self.d_ff = d_ff
 85 |         self.num_layers = num_layers
 86 |         self.num_heads = num_heads
 87 |         self.relative_attention_num_buckets = relative_attention_num_buckets
 88 |         self.dropout_rate = dropout_rate
 89 |         self.layer_norm_epsilon = layer_norm_epsilon
 90 |         self.initializer_factor = initializer_factor
 91 | 
 92 |     @property
 93 |     def max_position_embeddings(self):
 94 |         return self.n_positions
 95 | 
 96 |     @property
 97 |     def hidden_size(self):
 98 |         return self.d_model
 99 | 
100 |     @property
101 |     def num_attention_heads(self):
102 |         return self.num_heads
103 | 
104 |     @property
105 |     def num_hidden_layers(self):
106 |         return self.num_layers
107 | 


--------------------------------------------------------------------------------
/src/transformers/configuration_xlm_roberta.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ XLM-RoBERTa configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | from .configuration_roberta import RobertaConfig
22 | 
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
27 |     "xlm-roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-base-config.json",
28 |     "xlm-roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-config.json",
29 |     "xlm-roberta-large-finetuned-conll02-dutch": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-dutch-config.json",
30 |     "xlm-roberta-large-finetuned-conll02-spanish": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-spanish-config.json",
31 |     "xlm-roberta-large-finetuned-conll03-english": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-english-config.json",
32 |     "xlm-roberta-large-finetuned-conll03-german": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-german-config.json",
33 | }
34 | 
35 | 
36 | class XLMRobertaConfig(RobertaConfig):
37 |     pretrained_config_archive_map = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
38 | 


--------------------------------------------------------------------------------
/src/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert ALBERT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import AlbertConfig, AlbertForMaskedLM, load_tf_weights_in_albert
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = AlbertConfig.from_json_file(albert_config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 |     model = AlbertForMaskedLM(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_albert(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--albert_config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained ALBERT model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.albert_config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/src/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert BERT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = BertConfig.from_json_file(bert_config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 |     model = BertForPreTraining(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_bert(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--bert_config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained BERT model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.bert_config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/src/transformers/convert_gpt2_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert OpenAI GPT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import CONFIG_NAME, WEIGHTS_NAME, GPT2Config, GPT2Model, load_tf_weights_in_gpt2
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, pytorch_dump_folder_path):
30 |     # Construct model
31 |     if gpt2_config_file == "":
32 |         config = GPT2Config()
33 |     else:
34 |         config = GPT2Config.from_json_file(gpt2_config_file)
35 |     model = GPT2Model(config)
36 | 
37 |     # Load weights from numpy
38 |     load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path)
39 | 
40 |     # Save pytorch-model
41 |     pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
42 |     pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
43 |     print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
44 |     torch.save(model.state_dict(), pytorch_weights_dump_path)
45 |     print("Save configuration file to {}".format(pytorch_config_dump_path))
46 |     with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
47 |         f.write(config.to_json_string())
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser()
52 |     # Required parameters
53 |     parser.add_argument(
54 |         "--gpt2_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
55 |     )
56 |     parser.add_argument(
57 |         "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
58 |     )
59 |     parser.add_argument(
60 |         "--gpt2_config_file",
61 |         default="",
62 |         type=str,
63 |         help="An optional config json file corresponding to the pre-trained OpenAI model. \n"
64 |         "This specifies the model architecture.",
65 |     )
66 |     args = parser.parse_args()
67 |     convert_gpt2_checkpoint_to_pytorch(args.gpt2_checkpoint_path, args.gpt2_config_file, args.pytorch_dump_folder_path)
68 | 


--------------------------------------------------------------------------------
/src/transformers/convert_openai_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert OpenAI GPT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import CONFIG_NAME, WEIGHTS_NAME, OpenAIGPTConfig, OpenAIGPTModel, load_tf_weights_in_openai_gpt
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_config_file, pytorch_dump_folder_path):
30 |     # Construct model
31 |     if openai_config_file == "":
32 |         config = OpenAIGPTConfig()
33 |     else:
34 |         config = OpenAIGPTConfig.from_json_file(openai_config_file)
35 |     model = OpenAIGPTModel(config)
36 | 
37 |     # Load weights from numpy
38 |     load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path)
39 | 
40 |     # Save pytorch-model
41 |     pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
42 |     pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
43 |     print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
44 |     torch.save(model.state_dict(), pytorch_weights_dump_path)
45 |     print("Save configuration file to {}".format(pytorch_config_dump_path))
46 |     with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
47 |         f.write(config.to_json_string())
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser()
52 |     # Required parameters
53 |     parser.add_argument(
54 |         "--openai_checkpoint_folder_path",
55 |         default=None,
56 |         type=str,
57 |         required=True,
58 |         help="Path to the TensorFlow checkpoint path.",
59 |     )
60 |     parser.add_argument(
61 |         "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
62 |     )
63 |     parser.add_argument(
64 |         "--openai_config_file",
65 |         default="",
66 |         type=str,
67 |         help="An optional config json file corresponding to the pre-trained OpenAI model. \n"
68 |         "This specifies the model architecture.",
69 |     )
70 |     args = parser.parse_args()
71 |     convert_openai_checkpoint_to_pytorch(
72 |         args.openai_checkpoint_folder_path, args.openai_config_file, args.pytorch_dump_folder_path
73 |     )
74 | 


--------------------------------------------------------------------------------
/src/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The T5 authors and HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert T5 checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import T5Config, T5Model, load_tf_weights_in_t5
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = T5Config.from_json_file(config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 |     model = T5Model(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_t5(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained T5 model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/src/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert OpenAI GPT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import json
20 | import logging
21 | 
22 | import numpy
23 | import torch
24 | 
25 | from transformers import CONFIG_NAME, WEIGHTS_NAME
26 | from transformers.tokenization_xlm import VOCAB_FILES_NAMES
27 | 
28 | 
29 | logging.basicConfig(level=logging.INFO)
30 | 
31 | 
32 | def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_path):
33 |     # Load checkpoint
34 |     chkpt = torch.load(xlm_checkpoint_path, map_location="cpu")
35 | 
36 |     state_dict = chkpt["model"]
37 | 
38 |     # We have the base model one level deeper than the original XLM repository
39 |     two_levels_state_dict = {}
40 |     for k, v in state_dict.items():
41 |         if "pred_layer" in k:
42 |             two_levels_state_dict[k] = v
43 |         else:
44 |             two_levels_state_dict["transformer." + k] = v
45 | 
46 |     config = chkpt["params"]
47 |     config = dict((n, v) for n, v in config.items() if not isinstance(v, (torch.FloatTensor, numpy.ndarray)))
48 | 
49 |     vocab = chkpt["dico_word2id"]
50 |     vocab = dict((s + "</w>" if s.find("@@") == -1 and i > 13 else s.replace("@@", ""), i) for s, i in vocab.items())
51 | 
52 |     # Save pytorch-model
53 |     pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
54 |     pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
55 |     pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["vocab_file"]
56 | 
57 |     print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
58 |     torch.save(two_levels_state_dict, pytorch_weights_dump_path)
59 | 
60 |     print("Save configuration file to {}".format(pytorch_config_dump_path))
61 |     with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
62 |         f.write(json.dumps(config, indent=2) + "\n")
63 | 
64 |     print("Save vocab file to {}".format(pytorch_config_dump_path))
65 |     with open(pytorch_vocab_dump_path, "w", encoding="utf-8") as f:
66 |         f.write(json.dumps(vocab, indent=2) + "\n")
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     parser = argparse.ArgumentParser()
71 |     # Required parameters
72 |     parser.add_argument(
73 |         "--xlm_checkpoint_path", default=None, type=str, required=True, help="Path the official PyTorch dump."
74 |     )
75 |     parser.add_argument(
76 |         "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
77 |     )
78 |     args = parser.parse_args()
79 |     convert_xlm_checkpoint_to_pytorch(args.xlm_checkpoint_path, args.pytorch_dump_folder_path)
80 | 


--------------------------------------------------------------------------------
/src/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Convert BERT checkpoint."""
 16 | 
 17 | 
 18 | import argparse
 19 | import logging
 20 | import os
 21 | 
 22 | import torch
 23 | 
 24 | from transformers import (
 25 |     CONFIG_NAME,
 26 |     WEIGHTS_NAME,
 27 |     XLNetConfig,
 28 |     XLNetForQuestionAnswering,
 29 |     XLNetForSequenceClassification,
 30 |     XLNetLMHeadModel,
 31 |     load_tf_weights_in_xlnet,
 32 | )
 33 | 
 34 | 
 35 | GLUE_TASKS_NUM_LABELS = {
 36 |     "cola": 2,
 37 |     "mnli": 3,
 38 |     "mrpc": 2,
 39 |     "sst-2": 2,
 40 |     "sts-b": 1,
 41 |     "qqp": 2,
 42 |     "qnli": 2,
 43 |     "rte": 2,
 44 |     "wnli": 2,
 45 | }
 46 | 
 47 | 
 48 | logging.basicConfig(level=logging.INFO)
 49 | 
 50 | 
 51 | def convert_xlnet_checkpoint_to_pytorch(
 52 |     tf_checkpoint_path, bert_config_file, pytorch_dump_folder_path, finetuning_task=None
 53 | ):
 54 |     # Initialise PyTorch model
 55 |     config = XLNetConfig.from_json_file(bert_config_file)
 56 | 
 57 |     finetuning_task = finetuning_task.lower() if finetuning_task is not None else ""
 58 |     if finetuning_task in GLUE_TASKS_NUM_LABELS:
 59 |         print("Building PyTorch XLNetForSequenceClassification model from configuration: {}".format(str(config)))
 60 |         config.finetuning_task = finetuning_task
 61 |         config.num_labels = GLUE_TASKS_NUM_LABELS[finetuning_task]
 62 |         model = XLNetForSequenceClassification(config)
 63 |     elif "squad" in finetuning_task:
 64 |         config.finetuning_task = finetuning_task
 65 |         model = XLNetForQuestionAnswering(config)
 66 |     else:
 67 |         model = XLNetLMHeadModel(config)
 68 | 
 69 |     # Load weights from tf checkpoint
 70 |     load_tf_weights_in_xlnet(model, config, tf_checkpoint_path)
 71 | 
 72 |     # Save pytorch-model
 73 |     pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)
 74 |     pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME)
 75 |     print("Save PyTorch model to {}".format(os.path.abspath(pytorch_weights_dump_path)))
 76 |     torch.save(model.state_dict(), pytorch_weights_dump_path)
 77 |     print("Save configuration file to {}".format(os.path.abspath(pytorch_config_dump_path)))
 78 |     with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
 79 |         f.write(config.to_json_string())
 80 | 
 81 | 
 82 | if __name__ == "__main__":
 83 |     parser = argparse.ArgumentParser()
 84 |     # Required parameters
 85 |     parser.add_argument(
 86 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
 87 |     )
 88 |     parser.add_argument(
 89 |         "--xlnet_config_file",
 90 |         default=None,
 91 |         type=str,
 92 |         required=True,
 93 |         help="The config json file corresponding to the pre-trained XLNet model. \n"
 94 |         "This specifies the model architecture.",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--pytorch_dump_folder_path",
 98 |         default=None,
 99 |         type=str,
100 |         required=True,
101 |         help="Path to the folder to store the PyTorch model or dataset/vocab.",
102 |     )
103 |     parser.add_argument(
104 |         "--finetuning_task",
105 |         default=None,
106 |         type=str,
107 |         help="Name of a task on which the XLNet TensorFloaw model was fine-tuned",
108 |     )
109 |     args = parser.parse_args()
110 |     print(args)
111 | 
112 |     convert_xlnet_checkpoint_to_pytorch(
113 |         args.tf_checkpoint_path, args.xlnet_config_file, args.pytorch_dump_folder_path, args.finetuning_task
114 |     )
115 | 


--------------------------------------------------------------------------------
/src/transformers/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | from .metrics import is_sklearn_available
 6 | from .processors import (
 7 |     DataProcessor,
 8 |     InputExample,
 9 |     InputFeatures,
10 |     SingleSentenceClassificationProcessor,
11 |     SquadExample,
12 |     SquadFeatures,
13 |     SquadV1Processor,
14 |     SquadV2Processor,
15 |     glue_convert_examples_to_features,
16 |     glue_output_modes,
17 |     glue_processors,
18 |     glue_tasks_num_labels,
19 |     squad_convert_examples_to_features,
20 |     xnli_output_modes,
21 |     xnli_processors,
22 |     xnli_tasks_num_labels,
23 | )
24 | 
25 | 
26 | if is_sklearn_available():
27 |     from .metrics import glue_compute_metrics, xnli_compute_metrics
28 | 


--------------------------------------------------------------------------------
/src/transformers/data/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | try:
18 |     from scipy.stats import pearsonr, spearmanr
19 |     from sklearn.metrics import matthews_corrcoef, f1_score
20 | 
21 |     _has_sklearn = True
22 | except (AttributeError, ImportError):
23 |     _has_sklearn = False
24 | 
25 | 
26 | def is_sklearn_available():
27 |     return _has_sklearn
28 | 
29 | 
30 | if _has_sklearn:
31 | 
32 |     def simple_accuracy(preds, labels):
33 |         return (preds == labels).mean()
34 | 
35 |     def acc_and_f1(preds, labels):
36 |         acc = simple_accuracy(preds, labels)
37 |         f1 = f1_score(y_true=labels, y_pred=preds)
38 |         return {
39 |             "acc": acc,
40 |             "f1": f1,
41 |             "acc_and_f1": (acc + f1) / 2,
42 |         }
43 | 
44 |     def pearson_and_spearman(preds, labels):
45 |         pearson_corr = pearsonr(preds, labels)[0]
46 |         spearman_corr = spearmanr(preds, labels)[0]
47 |         return {
48 |             "pearson": pearson_corr,
49 |             "spearmanr": spearman_corr,
50 |             "corr": (pearson_corr + spearman_corr) / 2,
51 |         }
52 | 
53 |     def glue_compute_metrics(task_name, preds, labels):
54 |         assert len(preds) == len(labels)
55 |         if task_name == "cola":
56 |             return {"mcc": matthews_corrcoef(labels, preds)}
57 |         elif task_name == "sst-2":
58 |             return {"acc": simple_accuracy(preds, labels)}
59 |         elif task_name == "mrpc":
60 |             return acc_and_f1(preds, labels)
61 |         elif task_name == "sts-b":
62 |             return pearson_and_spearman(preds, labels)
63 |         elif task_name == "qqp":
64 |             return acc_and_f1(preds, labels)
65 |         elif task_name == "mnli":
66 |             return {"acc": simple_accuracy(preds, labels)}
67 |         elif task_name == "mnli-mm":
68 |             return {"acc": simple_accuracy(preds, labels)}
69 |         elif task_name == "qnli":
70 |             return {"acc": simple_accuracy(preds, labels)}
71 |         elif task_name == "rte":
72 |             return {"acc": simple_accuracy(preds, labels)}
73 |         elif task_name == "wnli":
74 |             return {"acc": simple_accuracy(preds, labels)}
75 |         else:
76 |             raise KeyError(task_name)
77 | 
78 |     def xnli_compute_metrics(task_name, preds, labels):
79 |         assert len(preds) == len(labels)
80 |         if task_name == "xnli":
81 |             return {"acc": simple_accuracy(preds, labels)}
82 |         else:
83 |             raise KeyError(task_name)
84 | 


--------------------------------------------------------------------------------
/src/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 | 
5 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
6 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
7 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
8 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
9 | 


--------------------------------------------------------------------------------
/src/transformers/data/processors/xnli.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ XNLI utils (dataset loading and evaluation) """
17 | 
18 | 
19 | import logging
20 | import os
21 | 
22 | from .utils import DataProcessor, InputExample
23 | 
24 | 
25 | logger = logging.getLogger(__name__)
26 | 
27 | 
28 | class XnliProcessor(DataProcessor):
29 |     """Processor for the XNLI dataset.
30 |     Adapted from https://github.com/google-research/bert/blob/f39e881b169b9d53bea03d2d341b31707a6c052b/run_classifier.py#L207"""
31 | 
32 |     def __init__(self, language, train_language=None):
33 |         self.language = language
34 |         self.train_language = train_language
35 | 
36 |     def get_train_examples(self, data_dir):
37 |         """See base class."""
38 |         lg = self.language if self.train_language is None else self.train_language
39 |         lines = self._read_tsv(os.path.join(data_dir, "XNLI-MT-1.0/multinli/multinli.train.{}.tsv".format(lg)))
40 |         examples = []
41 |         for (i, line) in enumerate(lines):
42 |             if i == 0:
43 |                 continue
44 |             guid = "%s-%s" % ("train", i)
45 |             text_a = line[0]
46 |             text_b = line[1]
47 |             label = "contradiction" if line[2] == "contradictory" else line[2]
48 |             assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
49 |             examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
50 |         return examples
51 | 
52 |     def get_test_examples(self, data_dir):
53 |         """See base class."""
54 |         lines = self._read_tsv(os.path.join(data_dir, "XNLI-1.0/xnli.test.tsv"))
55 |         examples = []
56 |         for (i, line) in enumerate(lines):
57 |             if i == 0:
58 |                 continue
59 |             language = line[0]
60 |             if language != self.language:
61 |                 continue
62 |             guid = "%s-%s" % ("test", i)
63 |             text_a = line[6]
64 |             text_b = line[7]
65 |             label = line[1]
66 |             assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
67 |             examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
68 |         return examples
69 | 
70 |     def get_labels(self):
71 |         """See base class."""
72 |         return ["contradiction", "entailment", "neutral"]
73 | 
74 | 
75 | xnli_processors = {
76 |     "xnli": XnliProcessor,
77 | }
78 | 
79 | xnli_output_modes = {
80 |     "xnli": "classification",
81 | }
82 | 
83 | xnli_tasks_num_labels = {
84 |     "xnli": 3,
85 | }
86 | 


--------------------------------------------------------------------------------
/src/transformers/tokenization_distilbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for DistilBERT."""
16 | 
17 | 
18 | import logging
19 | 
20 | from .tokenization_bert import BertTokenizer
21 | 
22 | 
23 | logger = logging.getLogger(__name__)
24 | 
25 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
26 | 
27 | PRETRAINED_VOCAB_FILES_MAP = {
28 |     "vocab_file": {
29 |         "distilbert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
30 |         "distilbert-base-uncased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
31 |         "distilbert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-german-cased-vocab.txt",
32 |         "distilbert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
33 |     }
34 | }
35 | 
36 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
37 |     "distilbert-base-uncased": 512,
38 |     "distilbert-base-uncased-distilled-squad": 512,
39 |     "distilbert-base-german-cased": 512,
40 |     "distilbert-base-multilingual-cased": 512,
41 | }
42 | 
43 | 
44 | PRETRAINED_INIT_CONFIGURATION = {
45 |     "distilbert-base-uncased": {"do_lower_case": True},
46 |     "distilbert-base-uncased-distilled-squad": {"do_lower_case": True},
47 |     "distilbert-base-german-cased": {"do_lower_case": False},
48 |     "distilbert-base-multilingual-cased": {"do_lower_case": False},
49 | }
50 | 
51 | 
52 | class DistilBertTokenizer(BertTokenizer):
53 |     r"""
54 |     Constructs a DistilBertTokenizer.
55 |     :class:`~transformers.DistilBertTokenizer` is identical to BertTokenizer and runs end-to-end tokenization: punctuation splitting + wordpiece
56 | 
57 |     Args:
58 |         vocab_file: Path to a one-wordpiece-per-line vocabulary file
59 |         do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
60 |         do_basic_tokenize: Whether to do basic tokenization before wordpiece.
61 |         max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
62 |             minimum of this value (if specified) and the underlying BERT model's sequence length.
63 |         never_split: List of tokens which will never be split during tokenization. Only has an effect when
64 |             do_basic_tokenize=True
65 |     """
66 | 
67 |     vocab_files_names = VOCAB_FILES_NAMES
68 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
69 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
70 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
71 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_example_script/README.md:
--------------------------------------------------------------------------------
1 | # How to add a new example script in 🤗Transformers
2 | 
3 | This folder provide a template for adding a new example script implementing a training or inference task with the models in the  🤗Transformers library.
4 | 
5 | Currently only examples for PyTorch are provided which are adaptations of the library's SQuAD examples which implement single-GPU and distributed training with gradient accumulation and mixed-precision (using NVIDIA's apex library) to cover a reasonable range of use cases.
6 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/README.md:
--------------------------------------------------------------------------------
 1 | # How to add a new model in 🤗Transformers
 2 | 
 3 | This folder describes the process to add a new model in 🤗Transformers and provide templates for the required files.
 4 | 
 5 | The library is designed to incorporate a variety of models and code bases. As such the process for adding a new model usually mostly consists in copy-pasting to relevant original code in the various sections of the templates included in the present repository.
 6 | 
 7 | One important point though is that the library has the following goals impacting the way models are incorporated:
 8 | 
 9 | - one specific feature of the API is the capability to run the model and tokenizer inline. The tokenization code thus often have to be slightly adapted to allow for running in the python interpreter.
10 | - the package is also designed to be as self-consistent and with a small and reliable set of packages dependencies. In consequence, additional dependencies are usually not allowed when adding a model but can be allowed for the inclusion of a new tokenizer (recent examples of dependencies added for tokenizer specificities include `sentencepiece` and `sacremoses`). Please make sure to check the existing dependencies when possible before adding a new one.
11 | 
12 | For a quick overview of the library organization, please check the [QuickStart section of the documentation](https://huggingface.co/transformers/quickstart.html).
13 | 
14 | # Typical workflow for including a model
15 | 
16 | Here an overview of the general workflow: 
17 | 
18 | - [ ] add model/configuration/tokenization classes
19 | - [ ] add conversion scripts
20 | - [ ] add tests
21 | - [ ] finalize
22 | 
23 | Let's detail what should be done at each step
24 | 
25 | ## Adding model/configuration/tokenization classes
26 | 
27 | Here is the workflow for adding model/configuration/tokenization classes:
28 | 
29 | - [ ] copy the python files from the present folder to the main folder and rename them, replacing `xxx` with your model name,
30 | - [ ] edit the files to replace `XXX` (with various casing) with your model name
31 | - [ ] copy-paste or create a simple configuration class for your model in the `configuration_...` file
32 | - [ ] copy-paste or create the code for your model in the `modeling_...` files (PyTorch and TF 2.0)
33 | - [ ] copy-paste or create a tokenizer class for your model in the `tokenization_...` file
34 | 
35 | # Adding conversion scripts
36 | 
37 | Here is the workflow for the conversion scripts:
38 | 
39 | - [ ] copy the conversion script (`convert_...`) from the present folder to the main folder.
40 | - [ ] edit this script to convert your original checkpoint weights to the current pytorch ones.
41 | 
42 | # Adding tests:
43 | 
44 | Here is the workflow for the adding tests:
45 | 
46 | - [ ] copy the python files from the `tests` sub-folder of the present folder to the `tests` subfolder of the main folder and rename them, replacing `xxx` with your model name,
47 | - [ ] edit the tests files to replace `XXX` (with various casing) with your model name
48 | - [ ] edit the tests code as needed
49 | 
50 | # Final steps
51 | 
52 | You can then finish the addition step by adding imports for your classes in the common files:
53 | 
54 | - [ ] add import for all the relevant classes in `__init__.py`
55 | - [ ] add your configuration in `configuration_auto.py`
56 | - [ ] add your PyTorch and TF 2.0 model respectively in `modeling_auto.py` and `modeling_tf_auto.py`
57 | - [ ] add your tokenizer in `tokenization_auto.py`
58 | - [ ] add your models and tokenizer to `pipeline.py`
59 | - [ ] add a link to your conversion script in the main conversion utility (in `commands/convert.py`)
60 | - [ ] edit the PyTorch to TF 2.0 conversion script to add your model in the `convert_pytorch_checkpoint_to_tf2.py` file
61 | - [ ] add a mention of your model in the doc: `README.md` and the documentation itself at `docs/source/pretrained_models.rst`.
62 | - [ ] upload the pretrained weigths, configurations and vocabulary files.
63 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/convert_xxx_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert XXX checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import XxxConfig, XxxForPreTraining, load_tf_weights_in_xxx
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = XxxConfig.from_json_file(config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 |     model = XxxForPreTraining(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_xxx(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/templates/adding_a_new_model/tests/test_tokenization_xxx.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 XXX Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import os
18 | import unittest
19 | 
20 | from transformers.tokenization_bert import VOCAB_FILES_NAMES, XxxTokenizer
21 | 
22 | from .test_tokenization_common import TokenizerTesterMixin
23 | 
24 | 
25 | class XxxTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
26 | 
27 |     tokenizer_class = XxxTokenizer
28 | 
29 |     def setUp(self):
30 |         super(XxxTokenizationTest, self).setUp()
31 | 
32 |         vocab_tokens = [
33 |             "[UNK]",
34 |             "[CLS]",
35 |             "[SEP]",
36 |             "want",
37 |             "##want",
38 |             "##ed",
39 |             "wa",
40 |             "un",
41 |             "runn",
42 |             "##ing",
43 |             ",",
44 |             "low",
45 |             "lowest",
46 |         ]
47 |         self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
48 |         with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
49 |             vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
50 | 
51 |     def get_tokenizer(self, **kwargs):
52 |         return XxxTokenizer.from_pretrained(self.tmpdirname, **kwargs)
53 | 
54 |     def get_input_output_texts(self):
55 |         input_text = "UNwant\u00E9d,running"
56 |         output_text = "unwanted, running"
57 |         return input_text, output_text
58 | 
59 |     def test_full_tokenizer(self):
60 |         tokenizer = self.tokenizer_class(self.vocab_file)
61 | 
62 |         tokens = tokenizer.tokenize("UNwant\u00E9d,running")
63 |         self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
64 |         self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
65 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/tests/__init__.py


--------------------------------------------------------------------------------
/tests/fixtures/empty.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/tests/fixtures/empty.txt


--------------------------------------------------------------------------------
/tests/fixtures/input.txt:
--------------------------------------------------------------------------------
1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/sample_text.txt:
--------------------------------------------------------------------------------
 1 | This text is included to make sure Unicode is handled properly: 力加勝北区ᴵᴺᵀᵃছজটডণত
 2 | Text should be one-sentence-per-line, with empty lines between documents.
 3 | This sample text is public domain and was randomly selected from Project Guttenberg.
 4 | 
 5 | The rain had only ceased with the gray streaks of morning at Blazing Star, and the settlement awoke to a moral sense of cleanliness, and the finding of forgotten knives, tin cups, and smaller camp utensils, where the heavy showers had washed away the debris and dust heaps before the cabin doors.
 6 | Indeed, it was recorded in Blazing Star that a fortunate early riser had once picked up on the highway a solid chunk of gold quartz which the rain had freed from its incumbering soil, and washed into immediate and glittering popularity.
 7 | Possibly this may have been the reason why early risers in that locality, during the rainy season, adopted a thoughtful habit of body, and seldom lifted their eyes to the rifted or india-ink washed skies above them.
 8 | "Cass" Beard had risen early that morning, but not with a view to discovery.
 9 | A leak in his cabin roof,--quite consistent with his careless, improvident habits,--had roused him at 4 A. M., with a flooded "bunk" and wet blankets.
10 | The chips from his wood pile refused to kindle a fire to dry his bed-clothes, and he had recourse to a more provident neighbor's to supply the deficiency.
11 | This was nearly opposite.
12 | Mr. Cassius crossed the highway, and stopped suddenly.
13 | Something glittered in the nearest red pool before him.
14 | Gold, surely!
15 | But, wonderful to relate, not an irregular, shapeless fragment of crude ore, fresh from Nature's crucible, but a bit of jeweler's handicraft in the form of a plain gold ring.
16 | Looking at it more attentively, he saw that it bore the inscription, "May to Cass."
17 | Like most of his fellow gold-seekers, Cass was superstitious.
18 | 
19 | The fountain of classic wisdom, Hypatia herself.
20 | As the ancient sage--the name is unimportant to a monk--pumped water nightly that he might study by day, so I, the guardian of cloaks and parasols, at the sacred doors of her lecture-room, imbibe celestial knowledge.
21 | From my youth I felt in me a soul above the matter-entangled herd.
22 | She revealed to me the glorious fact, that I am a spark of Divinity itself.
23 | A fallen star, I am, sir!' continued he, pensively, stroking his lean stomach--'a fallen star!--fallen, if the dignity of philosophy will allow of the simile, among the hogs of the lower world--indeed, even into the hog-bucket itself. Well, after all, I will show you the way to the Archbishop's.
24 | There is a philosophic pleasure in opening one's treasures to the modest young.
25 | Perhaps you will assist me by carrying this basket of fruit?' And the little man jumped up, put his basket on Philammon's head, and trotted off up a neighbouring street.
26 | Philammon followed, half contemptuous, half wondering at what this philosophy might be, which could feed the self-conceit of anything so abject as his ragged little apish guide;
27 | but the novel roar and whirl of the street, the perpetual stream of busy faces, the line of curricles, palanquins, laden asses, camels, elephants, which met and passed him, and squeezed him up steps and into doorways, as they threaded their way through the great Moon-gate into the ample street beyond, drove everything from his mind but wondering curiosity, and a vague, helpless dread of that great living wilderness, more terrible than any dead wilderness of sand which he had left behind.
28 | Already he longed for the repose, the silence of the Laura--for faces which knew him and smiled upon him; but it was too late to turn back now.
29 | His guide held on for more than a mile up the great main street, crossed in the centre of the city, at right angles, by one equally magnificent, at each end of which, miles away, appeared, dim and distant over the heads of the living stream of passengers, the yellow sand-hills of the desert;
30 | while at the end of the vista in front of them gleamed the blue harbour, through a network of countless masts.
31 | At last they reached the quay at the opposite end of the street;
32 | and there burst on Philammon's astonished eyes a vast semicircle of blue sea, ringed with palaces and towers.
33 | He stopped involuntarily; and his little guide stopped also, and looked askance at the young monk, to watch the effect which that grand panorama should produce on him.
34 | 


--------------------------------------------------------------------------------
/tests/fixtures/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/tests/fixtures/spiece.model


--------------------------------------------------------------------------------
/tests/fixtures/test_sentencepiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erenup/pytorch-transformers/896a0eb1fd861bc37097a9b669ebf4cb8d523de7/tests/fixtures/test_sentencepiece.model


--------------------------------------------------------------------------------
/tests/test_configuration_common.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import json
18 | import os
19 | import tempfile
20 | 
21 | 
22 | class ConfigTester(object):
23 |     def __init__(self, parent, config_class=None, **kwargs):
24 |         self.parent = parent
25 |         self.config_class = config_class
26 |         self.inputs_dict = kwargs
27 | 
28 |     def create_and_test_config_common_properties(self):
29 |         config = self.config_class(**self.inputs_dict)
30 |         self.parent.assertTrue(hasattr(config, "vocab_size"))
31 |         self.parent.assertTrue(hasattr(config, "hidden_size"))
32 |         self.parent.assertTrue(hasattr(config, "num_attention_heads"))
33 |         self.parent.assertTrue(hasattr(config, "num_hidden_layers"))
34 | 
35 |     def create_and_test_config_to_json_string(self):
36 |         config = self.config_class(**self.inputs_dict)
37 |         obj = json.loads(config.to_json_string())
38 |         for key, value in self.inputs_dict.items():
39 |             self.parent.assertEqual(obj[key], value)
40 | 
41 |     def create_and_test_config_to_json_file(self):
42 |         config_first = self.config_class(**self.inputs_dict)
43 | 
44 |         with tempfile.TemporaryDirectory() as tmpdirname:
45 |             json_file_path = os.path.join(tmpdirname, "config.json")
46 |             config_first.to_json_file(json_file_path)
47 |             config_second = self.config_class.from_json_file(json_file_path)
48 | 
49 |         self.parent.assertEqual(config_second.to_dict(), config_first.to_dict())
50 | 
51 |     def create_and_test_config_from_and_save_pretrained(self):
52 |         config_first = self.config_class(**self.inputs_dict)
53 | 
54 |         with tempfile.TemporaryDirectory() as tmpdirname:
55 |             config_first.save_pretrained(tmpdirname)
56 |             config_second = self.config_class.from_pretrained(tmpdirname)
57 | 
58 |         self.parent.assertEqual(config_second.to_dict(), config_first.to_dict())
59 | 
60 |     def run_common_tests(self):
61 |         self.create_and_test_config_common_properties()
62 |         self.create_and_test_config_to_json_string()
63 |         self.create_and_test_config_to_json_file()
64 |         self.create_and_test_config_from_and_save_pretrained()
65 | 


--------------------------------------------------------------------------------
/tests/test_hf_api.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2019-present, the HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import os
 18 | import time
 19 | import unittest
 20 | 
 21 | import requests
 22 | from requests.exceptions import HTTPError
 23 | 
 24 | from transformers.hf_api import HfApi, HfFolder, PresignedUrl, S3Obj
 25 | 
 26 | 
 27 | USER = "__DUMMY_TRANSFORMERS_USER__"
 28 | PASS = "__DUMMY_TRANSFORMERS_PASS__"
 29 | FILES = [
 30 |     (
 31 |         "Test-{}.txt".format(int(time.time())),
 32 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/input.txt"),
 33 |     ),
 34 |     (
 35 |         "yoyo {}.txt".format(int(time.time())),  # space is intentional
 36 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/empty.txt"),
 37 |     ),
 38 | ]
 39 | 
 40 | 
 41 | class HfApiCommonTest(unittest.TestCase):
 42 |     _api = HfApi(endpoint="https://moon-staging.huggingface.co")
 43 | 
 44 | 
 45 | class HfApiLoginTest(HfApiCommonTest):
 46 |     def test_login_invalid(self):
 47 |         with self.assertRaises(HTTPError):
 48 |             self._api.login(username=USER, password="fake")
 49 | 
 50 |     def test_login_valid(self):
 51 |         token = self._api.login(username=USER, password=PASS)
 52 |         self.assertIsInstance(token, str)
 53 | 
 54 | 
 55 | class HfApiEndpointsTest(HfApiCommonTest):
 56 |     @classmethod
 57 |     def setUpClass(cls):
 58 |         """
 59 |         Share this valid token in all tests below.
 60 |         """
 61 |         cls._token = cls._api.login(username=USER, password=PASS)
 62 | 
 63 |     @classmethod
 64 |     def tearDownClass(cls):
 65 |         for FILE_KEY, FILE_PATH in FILES:
 66 |             cls._api.delete_obj(token=cls._token, filename=FILE_KEY)
 67 | 
 68 |     def test_whoami(self):
 69 |         user = self._api.whoami(token=self._token)
 70 |         self.assertEqual(user, USER)
 71 | 
 72 |     def test_presign(self):
 73 |         for FILE_KEY, FILE_PATH in FILES:
 74 |             urls = self._api.presign(token=self._token, filename=FILE_KEY)
 75 |             self.assertIsInstance(urls, PresignedUrl)
 76 |             self.assertEqual(urls.type, "text/plain")
 77 | 
 78 |     def test_presign_and_upload(self):
 79 |         for FILE_KEY, FILE_PATH in FILES:
 80 |             access_url = self._api.presign_and_upload(token=self._token, filename=FILE_KEY, filepath=FILE_PATH)
 81 |             self.assertIsInstance(access_url, str)
 82 |             with open(FILE_PATH, "r") as f:
 83 |                 body = f.read()
 84 |             r = requests.get(access_url)
 85 |             self.assertEqual(r.text, body)
 86 | 
 87 |     def test_list_objs(self):
 88 |         objs = self._api.list_objs(token=self._token)
 89 |         self.assertIsInstance(objs, list)
 90 |         if len(objs) > 0:
 91 |             o = objs[-1]
 92 |             self.assertIsInstance(o, S3Obj)
 93 | 
 94 | 
 95 | class HfFolderTest(unittest.TestCase):
 96 |     def test_token_workflow(self):
 97 |         """
 98 |         Test the whole token save/get/delete workflow,
 99 |         with the desired behavior with respect to non-existent tokens.
100 |         """
101 |         token = "token-{}".format(int(time.time()))
102 |         HfFolder.save_token(token)
103 |         self.assertEqual(HfFolder.get_token(), token)
104 |         HfFolder.delete_token()
105 |         HfFolder.delete_token()
106 |         # ^^ not an error, we test that the
107 |         # second call does not fail.
108 |         self.assertEqual(HfFolder.get_token(), None)
109 | 


--------------------------------------------------------------------------------
/tests/test_model_card.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import json
18 | import os
19 | import tempfile
20 | import unittest
21 | 
22 | from transformers.modelcard import ModelCard
23 | 
24 | 
25 | class ModelCardTester(unittest.TestCase):
26 |     def setUp(self):
27 |         self.inputs_dict = {
28 |             "model_details": {
29 |                 "Organization": "testing",
30 |                 "Model date": "today",
31 |                 "Model version": "v2.1, Developed by Test Corp in 2019.",
32 |                 "Architecture": "Convolutional Neural Network.",
33 |             },
34 |             "metrics": "BLEU and ROUGE-1",
35 |             "evaluation_data": {
36 |                 "Datasets": {"BLEU": "My-great-dataset-v1", "ROUGE-1": "My-short-dataset-v2.1"},
37 |                 "Preprocessing": "See details on https://arxiv.org/pdf/1810.03993.pdf",
38 |             },
39 |             "training_data": {
40 |                 "Dataset": "English Wikipedia dump dated 2018-12-01",
41 |                 "Preprocessing": "Using SentencePiece vocabulary of size 52k tokens. See details on https://arxiv.org/pdf/1810.03993.pdf",
42 |             },
43 |             "quantitative_analyses": {"BLEU": 55.1, "ROUGE-1": 76},
44 |         }
45 | 
46 |     def test_model_card_common_properties(self):
47 |         modelcard = ModelCard.from_dict(self.inputs_dict)
48 |         self.assertTrue(hasattr(modelcard, "model_details"))
49 |         self.assertTrue(hasattr(modelcard, "intended_use"))
50 |         self.assertTrue(hasattr(modelcard, "factors"))
51 |         self.assertTrue(hasattr(modelcard, "metrics"))
52 |         self.assertTrue(hasattr(modelcard, "evaluation_data"))
53 |         self.assertTrue(hasattr(modelcard, "training_data"))
54 |         self.assertTrue(hasattr(modelcard, "quantitative_analyses"))
55 |         self.assertTrue(hasattr(modelcard, "ethical_considerations"))
56 |         self.assertTrue(hasattr(modelcard, "caveats_and_recommendations"))
57 | 
58 |     def test_model_card_to_json_string(self):
59 |         modelcard = ModelCard.from_dict(self.inputs_dict)
60 |         obj = json.loads(modelcard.to_json_string())
61 |         for key, value in self.inputs_dict.items():
62 |             self.assertEqual(obj[key], value)
63 | 
64 |     def test_model_card_to_json_file(self):
65 |         model_card_first = ModelCard.from_dict(self.inputs_dict)
66 | 
67 |         with tempfile.TemporaryDirectory() as tmpdirname:
68 |             filename = os.path.join(tmpdirname, "modelcard.json")
69 |             model_card_first.to_json_file(filename)
70 |             model_card_second = ModelCard.from_json_file(filename)
71 | 
72 |         self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
73 | 
74 |     def test_model_card_from_and_save_pretrained(self):
75 |         model_card_first = ModelCard.from_dict(self.inputs_dict)
76 | 
77 |         with tempfile.TemporaryDirectory() as tmpdirname:
78 |             model_card_first.save_pretrained(tmpdirname)
79 |             model_card_second = ModelCard.from_pretrained(tmpdirname)
80 | 
81 |         self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
82 | 


--------------------------------------------------------------------------------
/tests/test_modeling_auto.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The Google AI Language Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import logging
 18 | import unittest
 19 | 
 20 | from transformers import is_torch_available
 21 | 
 22 | from .utils import SMALL_MODEL_IDENTIFIER, require_torch, slow
 23 | 
 24 | 
 25 | if is_torch_available():
 26 |     from transformers import (
 27 |         AutoConfig,
 28 |         BertConfig,
 29 |         AutoModel,
 30 |         BertModel,
 31 |         AutoModelWithLMHead,
 32 |         BertForMaskedLM,
 33 |         AutoModelForSequenceClassification,
 34 |         BertForSequenceClassification,
 35 |         AutoModelForQuestionAnswering,
 36 |         BertForQuestionAnswering,
 37 |     )
 38 |     from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
 39 | 
 40 | 
 41 | @require_torch
 42 | class AutoModelTest(unittest.TestCase):
 43 |     @slow
 44 |     def test_model_from_pretrained(self):
 45 |         logging.basicConfig(level=logging.INFO)
 46 |         for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
 47 |             config = AutoConfig.from_pretrained(model_name)
 48 |             self.assertIsNotNone(config)
 49 |             self.assertIsInstance(config, BertConfig)
 50 | 
 51 |             model = AutoModel.from_pretrained(model_name)
 52 |             model, loading_info = AutoModel.from_pretrained(model_name, output_loading_info=True)
 53 |             self.assertIsNotNone(model)
 54 |             self.assertIsInstance(model, BertModel)
 55 |             for value in loading_info.values():
 56 |                 self.assertEqual(len(value), 0)
 57 | 
 58 |     @slow
 59 |     def test_lmhead_model_from_pretrained(self):
 60 |         logging.basicConfig(level=logging.INFO)
 61 |         for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
 62 |             config = AutoConfig.from_pretrained(model_name)
 63 |             self.assertIsNotNone(config)
 64 |             self.assertIsInstance(config, BertConfig)
 65 | 
 66 |             model = AutoModelWithLMHead.from_pretrained(model_name)
 67 |             model, loading_info = AutoModelWithLMHead.from_pretrained(model_name, output_loading_info=True)
 68 |             self.assertIsNotNone(model)
 69 |             self.assertIsInstance(model, BertForMaskedLM)
 70 | 
 71 |     @slow
 72 |     def test_sequence_classification_model_from_pretrained(self):
 73 |         logging.basicConfig(level=logging.INFO)
 74 |         for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
 75 |             config = AutoConfig.from_pretrained(model_name)
 76 |             self.assertIsNotNone(config)
 77 |             self.assertIsInstance(config, BertConfig)
 78 | 
 79 |             model = AutoModelForSequenceClassification.from_pretrained(model_name)
 80 |             model, loading_info = AutoModelForSequenceClassification.from_pretrained(
 81 |                 model_name, output_loading_info=True
 82 |             )
 83 |             self.assertIsNotNone(model)
 84 |             self.assertIsInstance(model, BertForSequenceClassification)
 85 | 
 86 |     @slow
 87 |     def test_question_answering_model_from_pretrained(self):
 88 |         logging.basicConfig(level=logging.INFO)
 89 |         for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
 90 |             config = AutoConfig.from_pretrained(model_name)
 91 |             self.assertIsNotNone(config)
 92 |             self.assertIsInstance(config, BertConfig)
 93 | 
 94 |             model = AutoModelForQuestionAnswering.from_pretrained(model_name)
 95 |             model, loading_info = AutoModelForQuestionAnswering.from_pretrained(model_name, output_loading_info=True)
 96 |             self.assertIsNotNone(model)
 97 |             self.assertIsInstance(model, BertForQuestionAnswering)
 98 | 
 99 |     def test_from_pretrained_identifier(self):
100 |         logging.basicConfig(level=logging.INFO)
101 |         model = AutoModelWithLMHead.from_pretrained(SMALL_MODEL_IDENTIFIER)
102 |         self.assertIsInstance(model, BertForMaskedLM)
103 | 


--------------------------------------------------------------------------------
/tests/test_modeling_encoder_decoder.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Hugging Face Inc. Team
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import logging
17 | import unittest
18 | 
19 | from transformers import is_torch_available
20 | 
21 | from .utils import require_torch, slow
22 | 
23 | 
24 | if is_torch_available():
25 |     from transformers import BertModel, BertForMaskedLM, Model2Model
26 |     from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
27 | 
28 | 
29 | @require_torch
30 | class EncoderDecoderModelTest(unittest.TestCase):
31 |     @slow
32 |     def test_model2model_from_pretrained(self):
33 |         logging.basicConfig(level=logging.INFO)
34 |         for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
35 |             model = Model2Model.from_pretrained(model_name)
36 |             self.assertIsInstance(model.encoder, BertModel)
37 |             self.assertIsInstance(model.decoder, BertForMaskedLM)
38 |             self.assertEqual(model.decoder.config.is_decoder, True)
39 |             self.assertEqual(model.encoder.config.is_decoder, False)
40 | 
41 |     def test_model2model_from_pretrained_not_bert(self):
42 |         logging.basicConfig(level=logging.INFO)
43 |         with self.assertRaises(ValueError):
44 |             _ = Model2Model.from_pretrained("roberta")
45 | 
46 |         with self.assertRaises(ValueError):
47 |             _ = Model2Model.from_pretrained("distilbert")
48 | 
49 |         with self.assertRaises(ValueError):
50 |             _ = Model2Model.from_pretrained("does-not-exist")
51 | 


--------------------------------------------------------------------------------
/tests/test_modeling_tf_auto.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The Google AI Language Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import logging
 18 | import unittest
 19 | 
 20 | from transformers import is_tf_available
 21 | 
 22 | from .utils import SMALL_MODEL_IDENTIFIER, require_tf, slow
 23 | 
 24 | 
 25 | if is_tf_available():
 26 |     from transformers import (
 27 |         AutoConfig,
 28 |         BertConfig,
 29 |         TFAutoModel,
 30 |         TFBertModel,
 31 |         TFAutoModelWithLMHead,
 32 |         TFBertForMaskedLM,
 33 |         TFAutoModelForSequenceClassification,
 34 |         TFBertForSequenceClassification,
 35 |         TFAutoModelForQuestionAnswering,
 36 |         TFBertForQuestionAnswering,
 37 |     )
 38 | 
 39 | 
 40 | @require_tf
 41 | class TFAutoModelTest(unittest.TestCase):
 42 |     @slow
 43 |     def test_model_from_pretrained(self):
 44 |         import h5py
 45 | 
 46 |         self.assertTrue(h5py.version.hdf5_version.startswith("1.10"))
 47 | 
 48 |         logging.basicConfig(level=logging.INFO)
 49 |         # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
 50 |         for model_name in ["bert-base-uncased"]:
 51 |             config = AutoConfig.from_pretrained(model_name)
 52 |             self.assertIsNotNone(config)
 53 |             self.assertIsInstance(config, BertConfig)
 54 | 
 55 |             model = TFAutoModel.from_pretrained(model_name)
 56 |             self.assertIsNotNone(model)
 57 |             self.assertIsInstance(model, TFBertModel)
 58 | 
 59 |     @slow
 60 |     def test_lmhead_model_from_pretrained(self):
 61 |         logging.basicConfig(level=logging.INFO)
 62 |         # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
 63 |         for model_name in ["bert-base-uncased"]:
 64 |             config = AutoConfig.from_pretrained(model_name)
 65 |             self.assertIsNotNone(config)
 66 |             self.assertIsInstance(config, BertConfig)
 67 | 
 68 |             model = TFAutoModelWithLMHead.from_pretrained(model_name)
 69 |             self.assertIsNotNone(model)
 70 |             self.assertIsInstance(model, TFBertForMaskedLM)
 71 | 
 72 |     @slow
 73 |     def test_sequence_classification_model_from_pretrained(self):
 74 |         logging.basicConfig(level=logging.INFO)
 75 |         # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
 76 |         for model_name in ["bert-base-uncased"]:
 77 |             config = AutoConfig.from_pretrained(model_name)
 78 |             self.assertIsNotNone(config)
 79 |             self.assertIsInstance(config, BertConfig)
 80 | 
 81 |             model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
 82 |             self.assertIsNotNone(model)
 83 |             self.assertIsInstance(model, TFBertForSequenceClassification)
 84 | 
 85 |     @slow
 86 |     def test_question_answering_model_from_pretrained(self):
 87 |         logging.basicConfig(level=logging.INFO)
 88 |         # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
 89 |         for model_name in ["bert-base-uncased"]:
 90 |             config = AutoConfig.from_pretrained(model_name)
 91 |             self.assertIsNotNone(config)
 92 |             self.assertIsInstance(config, BertConfig)
 93 | 
 94 |             model = TFAutoModelForQuestionAnswering.from_pretrained(model_name)
 95 |             self.assertIsNotNone(model)
 96 |             self.assertIsInstance(model, TFBertForQuestionAnswering)
 97 | 
 98 |     def test_from_pretrained_identifier(self):
 99 |         logging.basicConfig(level=logging.INFO)
100 |         model = TFAutoModelWithLMHead.from_pretrained(SMALL_MODEL_IDENTIFIER)
101 |         self.assertIsInstance(model, TFBertForMaskedLM)
102 | 


--------------------------------------------------------------------------------
/tests/test_optimization_tf.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from transformers import is_tf_available
 4 | 
 5 | from .utils import require_tf
 6 | 
 7 | 
 8 | if is_tf_available():
 9 |     import tensorflow as tf
10 |     from tensorflow.python.eager import context
11 |     from tensorflow.python.framework import ops
12 |     from transformers import create_optimizer, GradientAccumulator
13 | 
14 | 
15 | @require_tf
16 | class OptimizationFTest(unittest.TestCase):
17 |     def assertListAlmostEqual(self, list1, list2, tol):
18 |         self.assertEqual(len(list1), len(list2))
19 |         for a, b in zip(list1, list2):
20 |             self.assertAlmostEqual(a, b, delta=tol)
21 | 
22 |     def testGradientAccumulator(self):
23 |         accumulator = GradientAccumulator()
24 |         accumulator([tf.constant([1.0, 2.0])])
25 |         accumulator([tf.constant([-2.0, 1.0])])
26 |         accumulator([tf.constant([-1.0, 2.0])])
27 |         with self.assertRaises(ValueError):
28 |             accumulator([tf.constant([1.0, 1.0]), tf.constant([2.0, 2.0])])
29 |         self.assertEqual(accumulator.step, 3)
30 |         self.assertEqual(len(accumulator.gradients), 1)
31 |         self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [-2.0, 5.0], tol=1e-2)
32 |         accumulator.reset()
33 |         self.assertEqual(accumulator.step, 0)
34 |         self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [0.0, 0.0], tol=1e-2)
35 | 
36 |     def testGradientAccumulatorDistributionStrategy(self):
37 |         context._context = None
38 |         ops.enable_eager_execution_internal()
39 |         physical_devices = tf.config.experimental.list_physical_devices("CPU")
40 |         tf.config.experimental.set_virtual_device_configuration(
41 |             physical_devices[0],
42 |             [tf.config.experimental.VirtualDeviceConfiguration(), tf.config.experimental.VirtualDeviceConfiguration()],
43 |         )
44 | 
45 |         devices = tf.config.experimental.list_logical_devices(device_type="CPU")
46 |         strategy = tf.distribute.MirroredStrategy(devices=[device.name for device in devices])
47 | 
48 |         with strategy.scope():
49 |             accumulator = GradientAccumulator()
50 |             variable = tf.Variable([4.0, 3.0])
51 |             optimizer = create_optimizer(5e-5, 10, 5)
52 |             gradient_placeholder = tf.Variable([0.0, 0.0], trainable=False)
53 | 
54 |         def accumulate_on_replica(gradient):
55 |             accumulator([gradient])
56 | 
57 |         def apply_on_replica():
58 |             optimizer.apply_gradients(list(zip(accumulator.gradients, [variable])), 1.0)
59 | 
60 |         @tf.function
61 |         def accumulate(grad1, grad2):
62 |             with strategy.scope():
63 |                 gradient_placeholder.values[0].assign(grad1)
64 |                 gradient_placeholder.values[1].assign(grad2)
65 |                 strategy.experimental_run_v2(accumulate_on_replica, args=(gradient_placeholder,))
66 | 
67 |         @tf.function
68 |         def apply_grad():
69 |             with strategy.scope():
70 |                 strategy.experimental_run_v2(apply_on_replica)
71 | 
72 |         accumulate([1.0, 2.0], [-1.0, 1.0])
73 |         accumulate([3.0, -1.0], [-1.0, -1.0])
74 |         accumulate([-2.0, 2.0], [3.0, -2.0])
75 |         self.assertEqual(accumulator.step, 3)
76 |         self.assertListAlmostEqual(accumulator._gradients[0].values[0].value().numpy().tolist(), [2.0, 3.0], tol=1e-2)
77 |         self.assertListAlmostEqual(accumulator._gradients[0].values[1].value().numpy().tolist(), [1.0, -2.0], tol=1e-2)
78 |         apply_grad()
79 |         self.assertListAlmostEqual(variable.value().numpy().tolist(), [4.0, 3.0], tol=1e-2)
80 |         accumulator.reset()
81 |         self.assertEqual(accumulator.step, 0)
82 |         self.assertListAlmostEqual(accumulator._gradients[0].values[0].value().numpy().tolist(), [0.0, 0.0], tol=1e-2)
83 |         self.assertListAlmostEqual(accumulator._gradients[0].values[1].value().numpy().tolist(), [0.0, 0.0], tol=1e-2)
84 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_albert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2019 Hugging Face inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import os
18 | import unittest
19 | 
20 | from transformers.tokenization_albert import AlbertTokenizer
21 | 
22 | from .test_tokenization_common import TokenizerTesterMixin
23 | 
24 | 
25 | SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/spiece.model")
26 | 
27 | 
28 | class AlbertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
29 | 
30 |     tokenizer_class = AlbertTokenizer
31 | 
32 |     def setUp(self):
33 |         super(AlbertTokenizationTest, self).setUp()
34 | 
35 |         # We have a SentencePiece fixture for testing
36 |         tokenizer = AlbertTokenizer(SAMPLE_VOCAB)
37 |         tokenizer.save_pretrained(self.tmpdirname)
38 | 
39 |     def get_tokenizer(self, **kwargs):
40 |         return AlbertTokenizer.from_pretrained(self.tmpdirname, **kwargs)
41 | 
42 |     def get_input_output_texts(self):
43 |         input_text = "this is a test"
44 |         output_text = "this is a test"
45 |         return input_text, output_text
46 | 
47 |     def test_full_tokenizer(self):
48 |         tokenizer = AlbertTokenizer(SAMPLE_VOCAB, keep_accents=True)
49 | 
50 |         tokens = tokenizer.tokenize("This is a test")
51 |         self.assertListEqual(tokens, ["▁this", "▁is", "▁a", "▁test"])
52 | 
53 |         self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [48, 25, 21, 1289])
54 | 
55 |         tokens = tokenizer.tokenize("I was born in 92000, and this is falsé.")
56 |         self.assertListEqual(
57 |             tokens, ["▁i", "▁was", "▁born", "▁in", "▁9", "2000", ",", "▁and", "▁this", "▁is", "▁fal", "s", "é", "."]
58 |         )
59 |         ids = tokenizer.convert_tokens_to_ids(tokens)
60 |         self.assertListEqual(ids, [31, 23, 386, 19, 561, 3050, 15, 17, 48, 25, 8256, 18, 1, 9])
61 | 
62 |         back_tokens = tokenizer.convert_ids_to_tokens(ids)
63 |         self.assertListEqual(
64 |             back_tokens,
65 |             ["▁i", "▁was", "▁born", "▁in", "▁9", "2000", ",", "▁and", "▁this", "▁is", "▁fal", "s", "<unk>", "."],
66 |         )
67 | 
68 |     def test_sequence_builders(self):
69 |         tokenizer = AlbertTokenizer(SAMPLE_VOCAB)
70 | 
71 |         text = tokenizer.encode("sequence builders")
72 |         text_2 = tokenizer.encode("multi-sequence build")
73 | 
74 |         encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
75 |         encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
76 | 
77 |         assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id]
78 |         assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + text_2 + [
79 |             tokenizer.sep_token_id
80 |         ]
81 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_auto.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import logging
18 | import unittest
19 | 
20 | from transformers import (
21 |     BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
22 |     GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
23 |     AutoTokenizer,
24 |     BertTokenizer,
25 |     GPT2Tokenizer,
26 | )
27 | 
28 | from .utils import SMALL_MODEL_IDENTIFIER, slow
29 | 
30 | 
31 | class AutoTokenizerTest(unittest.TestCase):
32 |     @slow
33 |     def test_tokenizer_from_pretrained(self):
34 |         logging.basicConfig(level=logging.INFO)
35 |         for model_name in list(BERT_PRETRAINED_CONFIG_ARCHIVE_MAP.keys())[:1]:
36 |             tokenizer = AutoTokenizer.from_pretrained(model_name)
37 |             self.assertIsNotNone(tokenizer)
38 |             self.assertIsInstance(tokenizer, BertTokenizer)
39 |             self.assertGreater(len(tokenizer), 0)
40 | 
41 |         for model_name in list(GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP.keys())[:1]:
42 |             tokenizer = AutoTokenizer.from_pretrained(model_name)
43 |             self.assertIsNotNone(tokenizer)
44 |             self.assertIsInstance(tokenizer, GPT2Tokenizer)
45 |             self.assertGreater(len(tokenizer), 0)
46 | 
47 |     def test_tokenizer_from_pretrained_identifier(self):
48 |         logging.basicConfig(level=logging.INFO)
49 |         tokenizer = AutoTokenizer.from_pretrained(SMALL_MODEL_IDENTIFIER)
50 |         self.assertIsInstance(tokenizer, BertTokenizer)
51 |         self.assertEqual(len(tokenizer), 12)
52 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_ctrl.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 Salesforce and HuggingFace Inc. team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import json
17 | import os
18 | import unittest
19 | 
20 | from transformers.tokenization_ctrl import VOCAB_FILES_NAMES, CTRLTokenizer
21 | 
22 | from .test_tokenization_common import TokenizerTesterMixin
23 | 
24 | 
25 | class CTRLTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
26 | 
27 |     tokenizer_class = CTRLTokenizer
28 | 
29 |     def setUp(self):
30 |         super(CTRLTokenizationTest, self).setUp()
31 | 
32 |         # Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt
33 |         vocab = ["adapt", "re@@", "a@@", "apt", "c@@", "t", "<unk>"]
34 |         vocab_tokens = dict(zip(vocab, range(len(vocab))))
35 |         merges = ["#version: 0.2", "a p", "ap t</w>", "r e", "a d", "ad apt</w>", ""]
36 |         self.special_tokens_map = {"unk_token": "<unk>"}
37 | 
38 |         self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
39 |         self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
40 |         with open(self.vocab_file, "w", encoding="utf-8") as fp:
41 |             fp.write(json.dumps(vocab_tokens) + "\n")
42 |         with open(self.merges_file, "w", encoding="utf-8") as fp:
43 |             fp.write("\n".join(merges))
44 | 
45 |     def get_tokenizer(self, **kwargs):
46 |         kwargs.update(self.special_tokens_map)
47 |         return CTRLTokenizer.from_pretrained(self.tmpdirname, **kwargs)
48 | 
49 |     def get_input_output_texts(self):
50 |         input_text = "adapt react readapt apt"
51 |         output_text = "adapt react readapt apt"
52 |         return input_text, output_text
53 | 
54 |     def test_full_tokenizer(self):
55 |         tokenizer = CTRLTokenizer(self.vocab_file, self.merges_file, **self.special_tokens_map)
56 |         text = "adapt react readapt apt"
57 |         bpe_tokens = "adapt re@@ a@@ c@@ t re@@ adapt apt".split()
58 |         tokens = tokenizer.tokenize(text)
59 |         self.assertListEqual(tokens, bpe_tokens)
60 | 
61 |         input_tokens = tokens + [tokenizer.unk_token]
62 | 
63 |         input_bpe_tokens = [0, 1, 2, 4, 5, 1, 0, 3, 6]
64 |         self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
65 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_distilbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from transformers.tokenization_distilbert import DistilBertTokenizer
18 | 
19 | from .test_tokenization_bert import BertTokenizationTest
20 | from .utils import slow
21 | 
22 | 
23 | class DistilBertTokenizationTest(BertTokenizationTest):
24 | 
25 |     tokenizer_class = DistilBertTokenizer
26 | 
27 |     def get_tokenizer(self, **kwargs):
28 |         return DistilBertTokenizer.from_pretrained(self.tmpdirname, **kwargs)
29 | 
30 |     @slow
31 |     def test_sequence_builders(self):
32 |         tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
33 | 
34 |         text = tokenizer.encode("sequence builders", add_special_tokens=False)
35 |         text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
36 | 
37 |         encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
38 |         encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
39 | 
40 |         assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id]
41 |         assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + text_2 + [
42 |             tokenizer.sep_token_id
43 |         ]
44 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_openai.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import json
18 | import os
19 | import unittest
20 | 
21 | from transformers.tokenization_openai import VOCAB_FILES_NAMES, OpenAIGPTTokenizer
22 | 
23 | from .test_tokenization_common import TokenizerTesterMixin
24 | 
25 | 
26 | class OpenAIGPTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
27 | 
28 |     tokenizer_class = OpenAIGPTTokenizer
29 | 
30 |     def setUp(self):
31 |         super(OpenAIGPTTokenizationTest, self).setUp()
32 | 
33 |         # Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt
34 |         vocab = [
35 |             "l",
36 |             "o",
37 |             "w",
38 |             "e",
39 |             "r",
40 |             "s",
41 |             "t",
42 |             "i",
43 |             "d",
44 |             "n",
45 |             "w</w>",
46 |             "r</w>",
47 |             "t</w>",
48 |             "lo",
49 |             "low",
50 |             "er</w>",
51 |             "low</w>",
52 |             "lowest</w>",
53 |             "newer</w>",
54 |             "wider</w>",
55 |             "<unk>",
56 |         ]
57 |         vocab_tokens = dict(zip(vocab, range(len(vocab))))
58 |         merges = ["#version: 0.2", "l o", "lo w", "e r</w>", ""]
59 | 
60 |         self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
61 |         self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
62 |         with open(self.vocab_file, "w") as fp:
63 |             fp.write(json.dumps(vocab_tokens))
64 |         with open(self.merges_file, "w") as fp:
65 |             fp.write("\n".join(merges))
66 | 
67 |     def get_tokenizer(self, **kwargs):
68 |         return OpenAIGPTTokenizer.from_pretrained(self.tmpdirname, **kwargs)
69 | 
70 |     def get_input_output_texts(self):
71 |         input_text = "lower newer"
72 |         output_text = "lower newer"
73 |         return input_text, output_text
74 | 
75 |     def test_full_tokenizer(self):
76 |         tokenizer = OpenAIGPTTokenizer(self.vocab_file, self.merges_file)
77 | 
78 |         text = "lower"
79 |         bpe_tokens = ["low", "er</w>"]
80 |         tokens = tokenizer.tokenize(text)
81 |         self.assertListEqual(tokens, bpe_tokens)
82 | 
83 |         input_tokens = tokens + ["<unk>"]
84 |         input_bpe_tokens = [14, 15, 20]
85 |         self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
86 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_t5.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import os
 18 | import unittest
 19 | 
 20 | from transformers.tokenization_t5 import T5Tokenizer
 21 | from transformers.tokenization_xlnet import SPIECE_UNDERLINE
 22 | 
 23 | from .test_tokenization_common import TokenizerTesterMixin
 24 | 
 25 | 
 26 | SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
 27 | 
 28 | 
 29 | class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
 30 | 
 31 |     tokenizer_class = T5Tokenizer
 32 | 
 33 |     def setUp(self):
 34 |         super(T5TokenizationTest, self).setUp()
 35 | 
 36 |         # We have a SentencePiece fixture for testing
 37 |         tokenizer = T5Tokenizer(SAMPLE_VOCAB)
 38 |         tokenizer.save_pretrained(self.tmpdirname)
 39 | 
 40 |     def get_tokenizer(self, **kwargs):
 41 |         return T5Tokenizer.from_pretrained(self.tmpdirname, **kwargs)
 42 | 
 43 |     def get_input_output_texts(self):
 44 |         input_text = "This is a test"
 45 |         output_text = "This is a test"
 46 |         return input_text, output_text
 47 | 
 48 |     def test_full_tokenizer(self):
 49 |         tokenizer = T5Tokenizer(SAMPLE_VOCAB)
 50 | 
 51 |         tokens = tokenizer.tokenize("This is a test")
 52 |         self.assertListEqual(tokens, ["▁This", "▁is", "▁a", "▁t", "est"])
 53 | 
 54 |         self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [285, 46, 10, 170, 382])
 55 | 
 56 |         tokens = tokenizer.tokenize("I was born in 92000, and this is falsé.")
 57 |         self.assertListEqual(
 58 |             tokens,
 59 |             [
 60 |                 SPIECE_UNDERLINE + "I",
 61 |                 SPIECE_UNDERLINE + "was",
 62 |                 SPIECE_UNDERLINE + "b",
 63 |                 "or",
 64 |                 "n",
 65 |                 SPIECE_UNDERLINE + "in",
 66 |                 SPIECE_UNDERLINE + "",
 67 |                 "9",
 68 |                 "2",
 69 |                 "0",
 70 |                 "0",
 71 |                 "0",
 72 |                 ",",
 73 |                 SPIECE_UNDERLINE + "and",
 74 |                 SPIECE_UNDERLINE + "this",
 75 |                 SPIECE_UNDERLINE + "is",
 76 |                 SPIECE_UNDERLINE + "f",
 77 |                 "al",
 78 |                 "s",
 79 |                 "é",
 80 |                 ".",
 81 |             ],
 82 |         )
 83 |         ids = tokenizer.convert_tokens_to_ids(tokens)
 84 |         self.assertListEqual(ids, [8, 21, 84, 55, 24, 19, 7, 0, 602, 347, 347, 347, 3, 12, 66, 46, 72, 80, 6, 0, 4])
 85 | 
 86 |         back_tokens = tokenizer.convert_ids_to_tokens(ids)
 87 |         self.assertListEqual(
 88 |             back_tokens,
 89 |             [
 90 |                 SPIECE_UNDERLINE + "I",
 91 |                 SPIECE_UNDERLINE + "was",
 92 |                 SPIECE_UNDERLINE + "b",
 93 |                 "or",
 94 |                 "n",
 95 |                 SPIECE_UNDERLINE + "in",
 96 |                 SPIECE_UNDERLINE + "",
 97 |                 "<unk>",
 98 |                 "2",
 99 |                 "0",
100 |                 "0",
101 |                 "0",
102 |                 ",",
103 |                 SPIECE_UNDERLINE + "and",
104 |                 SPIECE_UNDERLINE + "this",
105 |                 SPIECE_UNDERLINE + "is",
106 |                 SPIECE_UNDERLINE + "f",
107 |                 "al",
108 |                 "s",
109 |                 "<unk>",
110 |                 ".",
111 |             ],
112 |         )
113 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_transfo_xl.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import os
18 | import unittest
19 | 
20 | from transformers import is_torch_available
21 | 
22 | from .test_tokenization_common import TokenizerTesterMixin
23 | from .utils import require_torch
24 | 
25 | 
26 | if is_torch_available():
27 |     from transformers.tokenization_transfo_xl import TransfoXLTokenizer, VOCAB_FILES_NAMES
28 | 
29 | 
30 | @require_torch
31 | class TransfoXLTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
32 | 
33 |     tokenizer_class = TransfoXLTokenizer if is_torch_available() else None
34 | 
35 |     def setUp(self):
36 |         super(TransfoXLTokenizationTest, self).setUp()
37 | 
38 |         vocab_tokens = [
39 |             "<unk>",
40 |             "[CLS]",
41 |             "[SEP]",
42 |             "want",
43 |             "unwanted",
44 |             "wa",
45 |             "un",
46 |             "running",
47 |             ",",
48 |             "low",
49 |             "l",
50 |         ]
51 |         self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
52 |         with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
53 |             vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
54 | 
55 |     def get_tokenizer(self, **kwargs):
56 |         kwargs["lower_case"] = True
57 |         return TransfoXLTokenizer.from_pretrained(self.tmpdirname, **kwargs)
58 | 
59 |     def get_input_output_texts(self):
60 |         input_text = "<unk> UNwanted , running"
61 |         output_text = "<unk> unwanted, running"
62 |         return input_text, output_text
63 | 
64 |     def test_full_tokenizer(self):
65 |         tokenizer = TransfoXLTokenizer(vocab_file=self.vocab_file, lower_case=True)
66 | 
67 |         tokens = tokenizer.tokenize("<unk> UNwanted , running")
68 |         self.assertListEqual(tokens, ["<unk>", "unwanted", ",", "running"])
69 | 
70 |         self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [0, 4, 8, 7])
71 | 
72 |     def test_full_tokenizer_lower(self):
73 |         tokenizer = TransfoXLTokenizer(lower_case=True)
74 | 
75 |         self.assertListEqual(
76 |             tokenizer.tokenize(" \tHeLLo ! how  \n Are yoU ?  "), ["hello", "!", "how", "are", "you", "?"]
77 |         )
78 | 
79 |     def test_full_tokenizer_no_lower(self):
80 |         tokenizer = TransfoXLTokenizer(lower_case=False)
81 | 
82 |         self.assertListEqual(
83 |             tokenizer.tokenize(" \tHeLLo ! how  \n Are yoU ?  "), ["HeLLo", "!", "how", "Are", "yoU", "?"]
84 |         )
85 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 HuggingFace Inc..
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import unittest
18 | 
19 | from transformers import PreTrainedTokenizer
20 | from transformers.tokenization_gpt2 import GPT2Tokenizer
21 | 
22 | from .utils import slow
23 | 
24 | 
25 | class TokenizerUtilsTest(unittest.TestCase):
26 |     def check_tokenizer_from_pretrained(self, tokenizer_class):
27 |         s3_models = list(tokenizer_class.max_model_input_sizes.keys())
28 |         for model_name in s3_models[:1]:
29 |             tokenizer = tokenizer_class.from_pretrained(model_name)
30 |             self.assertIsNotNone(tokenizer)
31 |             self.assertIsInstance(tokenizer, tokenizer_class)
32 |             self.assertIsInstance(tokenizer, PreTrainedTokenizer)
33 | 
34 |             for special_tok in tokenizer.all_special_tokens:
35 |                 self.assertIsInstance(special_tok, str)
36 |                 special_tok_id = tokenizer.convert_tokens_to_ids(special_tok)
37 |                 self.assertIsInstance(special_tok_id, int)
38 | 
39 |     @slow
40 |     def test_pretrained_tokenizers(self):
41 |         self.check_tokenizer_from_pretrained(GPT2Tokenizer)
42 | 


--------------------------------------------------------------------------------
/tests/test_tokenization_xlm.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The Google AI Language Team Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import json
 18 | import os
 19 | import unittest
 20 | 
 21 | from transformers.tokenization_xlm import VOCAB_FILES_NAMES, XLMTokenizer
 22 | 
 23 | from .test_tokenization_common import TokenizerTesterMixin
 24 | from .utils import slow
 25 | 
 26 | 
 27 | class XLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
 28 | 
 29 |     tokenizer_class = XLMTokenizer
 30 | 
 31 |     def setUp(self):
 32 |         super(XLMTokenizationTest, self).setUp()
 33 | 
 34 |         # Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt
 35 |         vocab = [
 36 |             "l",
 37 |             "o",
 38 |             "w",
 39 |             "e",
 40 |             "r",
 41 |             "s",
 42 |             "t",
 43 |             "i",
 44 |             "d",
 45 |             "n",
 46 |             "w</w>",
 47 |             "r</w>",
 48 |             "t</w>",
 49 |             "lo",
 50 |             "low",
 51 |             "er</w>",
 52 |             "low</w>",
 53 |             "lowest</w>",
 54 |             "newer</w>",
 55 |             "wider</w>",
 56 |             "<unk>",
 57 |         ]
 58 |         vocab_tokens = dict(zip(vocab, range(len(vocab))))
 59 |         merges = ["l o 123", "lo w 1456", "e r</w> 1789", ""]
 60 | 
 61 |         self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
 62 |         self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
 63 |         with open(self.vocab_file, "w") as fp:
 64 |             fp.write(json.dumps(vocab_tokens))
 65 |         with open(self.merges_file, "w") as fp:
 66 |             fp.write("\n".join(merges))
 67 | 
 68 |     def get_tokenizer(self, **kwargs):
 69 |         return XLMTokenizer.from_pretrained(self.tmpdirname, **kwargs)
 70 | 
 71 |     def get_input_output_texts(self):
 72 |         input_text = "lower newer"
 73 |         output_text = "lower newer"
 74 |         return input_text, output_text
 75 | 
 76 |     def test_full_tokenizer(self):
 77 |         """ Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt """
 78 |         tokenizer = XLMTokenizer(self.vocab_file, self.merges_file)
 79 | 
 80 |         text = "lower"
 81 |         bpe_tokens = ["low", "er</w>"]
 82 |         tokens = tokenizer.tokenize(text)
 83 |         self.assertListEqual(tokens, bpe_tokens)
 84 | 
 85 |         input_tokens = tokens + ["<unk>"]
 86 |         input_bpe_tokens = [14, 15, 20]
 87 |         self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
 88 | 
 89 |     @slow
 90 |     def test_sequence_builders(self):
 91 |         tokenizer = XLMTokenizer.from_pretrained("xlm-mlm-en-2048")
 92 | 
 93 |         text = tokenizer.encode("sequence builders", add_special_tokens=False)
 94 |         text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
 95 | 
 96 |         encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
 97 |         encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
 98 | 
 99 |         assert encoded_sentence == [1] + text + [1]
100 |         assert encoded_pair == [1] + text + [1] + text_2 + [1]
101 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | import unittest
 4 | from distutils.util import strtobool
 5 | 
 6 | from transformers.file_utils import _tf_available, _torch_available
 7 | 
 8 | 
 9 | CACHE_DIR = os.path.join(tempfile.gettempdir(), "transformers_test")
10 | 
11 | SMALL_MODEL_IDENTIFIER = "julien-c/bert-xsmall-dummy"
12 | 
13 | 
14 | def parse_flag_from_env(key, default=False):
15 |     try:
16 |         value = os.environ[key]
17 |     except KeyError:
18 |         # KEY isn't set, default to `default`.
19 |         _value = default
20 |     else:
21 |         # KEY is set, convert it to True or False.
22 |         try:
23 |             _value = strtobool(value)
24 |         except ValueError:
25 |             # More values are supported, but let's keep the message simple.
26 |             raise ValueError("If set, {} must be yes or no.".format(key))
27 |     return _value
28 | 
29 | 
30 | _run_slow_tests = parse_flag_from_env("RUN_SLOW", default=False)
31 | _run_custom_tokenizers = parse_flag_from_env("RUN_CUSTOM_TOKENIZERS", default=False)
32 | 
33 | 
34 | def slow(test_case):
35 |     """
36 |     Decorator marking a test as slow.
37 | 
38 |     Slow tests are skipped by default. Set the RUN_SLOW environment variable
39 |     to a truthy value to run them.
40 | 
41 |     """
42 |     if not _run_slow_tests:
43 |         test_case = unittest.skip("test is slow")(test_case)
44 |     return test_case
45 | 
46 | 
47 | def custom_tokenizers(test_case):
48 |     """
49 |     Decorator marking a test for a custom tokenizer.
50 | 
51 |     Custom tokenizers require additional dependencies, and are skipped
52 |     by default. Set the RUN_CUSTOM_TOKENIZERS environment variable
53 |     to a truthy value to run them.
54 |     """
55 |     if not _run_custom_tokenizers:
56 |         test_case = unittest.skip("test of custom tokenizers")(test_case)
57 |     return test_case
58 | 
59 | 
60 | def require_torch(test_case):
61 |     """
62 |     Decorator marking a test that requires PyTorch.
63 | 
64 |     These tests are skipped when PyTorch isn't installed.
65 | 
66 |     """
67 |     if not _torch_available:
68 |         test_case = unittest.skip("test requires PyTorch")(test_case)
69 |     return test_case
70 | 
71 | 
72 | def require_tf(test_case):
73 |     """
74 |     Decorator marking a test that requires TensorFlow.
75 | 
76 |     These tests are skipped when TensorFlow isn't installed.
77 | 
78 |     """
79 |     if not _tf_available:
80 |         test_case = unittest.skip("test requires TensorFlow")(test_case)
81 |     return test_case
82 | 
83 | 
84 | if _torch_available:
85 |     # Set the USE_CUDA environment variable to select a GPU.
86 |     torch_device = "cuda" if parse_flag_from_env("USE_CUDA") else "cpu"
87 | else:
88 |     torch_device = None
89 | 


--------------------------------------------------------------------------------
/transformers-cli:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from argparse import ArgumentParser
 3 | 
 4 | from transformers.commands.download import DownloadCommand
 5 | from transformers.commands.run import RunCommand
 6 | from transformers.commands.user import UserCommands
 7 | from transformers.commands.convert import ConvertCommand
 8 | from transformers.commands.serving import ServeCommand
 9 | 
10 | if __name__ == '__main__':
11 |     parser = ArgumentParser('Transformers CLI tool', usage='transformers-cli <command> [<args>]')
12 |     commands_parser = parser.add_subparsers(help='transformers-cli command helpers')
13 | 
14 |     # Register commands
15 |     ConvertCommand.register_subcommand(commands_parser)
16 |     DownloadCommand.register_subcommand(commands_parser)
17 |     RunCommand.register_subcommand(commands_parser)
18 |     ServeCommand.register_subcommand(commands_parser)
19 |     UserCommands.register_subcommand(commands_parser)
20 | 
21 |     # Let's go
22 |     args = parser.parse_args()
23 | 
24 |     if not hasattr(args, 'func'):
25 |         parser.print_help()
26 |         exit(1)
27 | 
28 |     # Run
29 |     service = args.func(args)
30 |     service.run()
31 | 


--------------------------------------------------------------------------------
/utils/link_tester.py:
--------------------------------------------------------------------------------
 1 | """ Link tester.
 2 | 
 3 | This little utility reads all the python files in the repository,
 4 | scans for links pointing to S3 and tests the links one by one. Raises an error
 5 | at the end of the scan if at least one link was reported broken.
 6 | """
 7 | import os
 8 | import re
 9 | import sys
10 | 
11 | import requests
12 | 
13 | 
14 | REGEXP_FIND_S3_LINKS = r"""([\"'])(https:\/\/s3)(.*)?\1"""
15 | 
16 | 
17 | def list_python_files_in_repository():
18 |     """ List all python files in the repository.
19 | 
20 |     This function assumes that the script is executed in the root folder.
21 |     """
22 |     source_code_files = []
23 |     for path, subdirs, files in os.walk("."):
24 |         if "templates" in path:
25 |             continue
26 |         for name in files:
27 |             if ".py" in name and ".pyc" not in name:
28 |                 path_to_files = os.path.join(path, name)
29 |                 source_code_files.append(path_to_files)
30 | 
31 |     return source_code_files
32 | 
33 | 
34 | def find_all_links(file_paths):
35 |     links = []
36 |     for path in file_paths:
37 |         links += scan_code_for_links(path)
38 | 
39 |     return links
40 | 
41 | 
42 | def scan_code_for_links(source):
43 |     """ Scans the file to find links using a regular expression.
44 |     Returns a list of links.
45 |     """
46 |     with open(source, "r") as content:
47 |         content = content.read()
48 |         raw_links = re.findall(REGEXP_FIND_S3_LINKS, content)
49 |         links = [prefix + suffix for _, prefix, suffix in raw_links]
50 | 
51 |     return links
52 | 
53 | 
54 | def check_all_links(links):
55 |     """ Check that the provided links are valid.
56 | 
57 |     Links are considered valid if a HEAD request to the server
58 |     returns a 200 status code.
59 |     """
60 |     broken_links = []
61 |     for link in links:
62 |         head = requests.head(link)
63 |         if head.status_code != 200:
64 |             broken_links.append(link)
65 | 
66 |     return broken_links
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     file_paths = list_python_files_in_repository()
71 |     links = find_all_links(file_paths)
72 |     broken_links = check_all_links(links)
73 |     print("Looking for broken links to pre-trained models/configs/tokenizers...")
74 |     if broken_links:
75 |         print("The following links did not respond:")
76 |         for link in broken_links:
77 |             print("- {}".format(link))
78 |         sys.exit(1)
79 |     print("All links are ok.")
80 | 


--------------------------------------------------------------------------------
/valohai.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | - step:
 4 |     name: Execute python examples/run_glue.py
 5 |     image: pytorch/pytorch:nightly-devel-cuda10.0-cudnn7
 6 |     command:
 7 |       - python /valohai/repository/utils/download_glue_data.py --data_dir=/glue_data
 8 |       - pip install -e .
 9 |       - pip install -r examples/requirements.txt
10 |       - python examples/run_glue.py --do_train --data_dir=/glue_data/{parameter-value:task_name} {parameters}
11 |     parameters:
12 |       - name: model_type
13 |         pass-as: --model_type={v}
14 |         type: string
15 |         default: bert
16 |       - name: model_name_or_path
17 |         pass-as: --model_name_or_path={v}
18 |         type: string
19 |         default: bert-base-uncased
20 |       - name: task_name
21 |         pass-as: --task_name={v}
22 |         type: string
23 |         default: MRPC
24 |       - name: max_seq_length
25 |         pass-as: --max_seq_length={v}
26 |         description: The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.
27 |         type: integer
28 |         default: 128
29 |       - name: per_gpu_train_batch_size
30 |         pass-as: --per_gpu_train_batch_size={v}
31 |         description: Batch size per GPU/CPU for training.
32 |         type: integer
33 |         default: 8
34 |       - name: per_gpu_eval_batch_size
35 |         pass-as: --per_gpu_eval_batch_size={v}
36 |         description: Batch size per GPU/CPU for evaluation.
37 |         type: integer
38 |         default: 8
39 |       - name: gradient_accumulation_steps
40 |         pass-as: --gradient_accumulation_steps={v}
41 |         description: Number of updates steps to accumulate before performing a backward/update pass.
42 |         type: integer
43 |         default: 1
44 |       - name: learning_rate
45 |         pass-as: --learning_rate={v}
46 |         description: The initial learning rate for Adam.
47 |         type: float
48 |         default: 0.00005
49 |       - name: adam_epsilon
50 |         pass-as: --adam_epsilon={v}
51 |         description: Epsilon for Adam optimizer.
52 |         type: float
53 |         default: 0.00000001
54 |       - name: max_grad_norm
55 |         pass-as: --max_grad_norm={v}
56 |         description: Max gradient norm.
57 |         type: float
58 |         default: 1.0
59 |       - name: num_train_epochs
60 |         pass-as: --num_train_epochs={v}
61 |         description: Total number of training epochs to perform.
62 |         type: integer
63 |         default: 3
64 |       - name: max_steps
65 |         pass-as: --max_steps={v}
66 |         description: If > 0, set total number of training steps to perform. Override num_train_epochs.
67 |         type: integer
68 |         default: -1
69 |       - name: warmup_steps
70 |         pass-as: --warmup_steps={v}
71 |         description: Linear warmup over warmup_steps.
72 |         type: integer
73 |         default: -1
74 |       - name: logging_steps
75 |         pass-as: --logging_steps={v}
76 |         description: Log every X updates steps.
77 |         type: integer
78 |         default: 25
79 |       - name: save_steps
80 |         pass-as: --save_steps={v}
81 |         description: Save checkpoint every X updates steps.
82 |         type: integer
83 |         default: -1
84 |       - name: output_dir
85 |         pass-as: --output_dir={v}
86 |         type: string
87 |         default: /valohai/outputs
88 |       - name: evaluate_during_training
89 |         description: Run evaluation during training at each logging step.
90 |         type: flag
91 |         default: true
92 |       - name: do_lower_case
93 |         description: Set this flag if you are using an uncased model.
94 |         type: flag
95 | 


--------------------------------------------------------------------------------