├── transformers
    ├── file.py
    ├── benchmark
    │   ├── __init__.py
    │   ├── benchmark_args.py
    │   ├── benchmark_args_tf.py
    │   └── benchmark_args_utils.py
    ├── .modeling_bert.py.swp
    ├── .vscode
    │   └── settings.json
    ├── commands
    │   ├── __init__.py
    │   ├── transformers_cli.py
    │   ├── download.py
    │   ├── env.py
    │   ├── run.py
    │   └── train.py
    ├── data
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── dcn_language_modeling.py
    │   │   ├── language_modeling.py
    │   │   └── glue.py
    │   ├── processors
    │   │   ├── __init__.py
    │   │   └── xnli.py
    │   ├── __init__.py
    │   └── metrics
    │   │   └── __init__.py
    ├── another_try.py
    ├── try.py
    ├── configuration_marian.py
    ├── convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
    ├── filep.py
    ├── trainer_utils.py
    ├── configuration_camembert.py
    ├── configuration_mmbt.py
    ├── activations.py
    ├── convert_mobilebert_original_tf_checkpoint_to_pytorch.py
    ├── utils_encoder_decoder.py
    ├── configuration_xlm_roberta.py
    ├── convert_t5_original_tf_checkpoint_to_pytorch.py
    ├── convert_bert_original_tf_checkpoint_to_pytorch.py
    ├── convert_albert_original_tf_checkpoint_to_pytorch.py
    ├── modeling_marian.py
    ├── tokenization_longformer.py
    ├── training_args_tf.py
    ├── tokenization_mobilebert.py
    ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
    ├── tokenization_retribert.py
    ├── convert_openai_original_tf_checkpoint_to_pytorch.py
    ├── convert_electra_original_tf_checkpoint_to_pytorch.py
    ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
    ├── convert_longformer_original_pytorch_lightning_to_pytorch.py
    ├── configuration_longformer.py
    ├── configuration_roberta.py
    ├── tokenization_electra.py
    ├── tokenization_distilbert.py
    ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
    ├── convert_bert_pytorch_checkpoint_to_original_tf.py
    ├── configuration_encoder_decoder.py
    ├── configuration_t5.py
    ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
    ├── modeling_camembert.py
    ├── configuration_retribert.py
    ├── modeling_xlm_roberta.py
    ├── configuration_ctrl.py
    ├── tokenization_flaubert.py
    ├── configuration_bart.py
    ├── modeling_tf_camembert.py
    ├── modeling_tf_xlm_roberta.py
    ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
    └── configuration_distilbert.py
├── requirements.txt
├── predict.sh
├── config.json
├── chinese_roberta_wwm_ext_pytorch
    └── config.json
├── train.sh
├── README.md
├── .gitignore
└── vocab
    └── pinyin_vocab.txt


/transformers/file.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/transformers/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.5.0
2 | transformers==3.0.0
3 | 


--------------------------------------------------------------------------------
/transformers/.modeling_bert.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/destwang/DCN/HEAD/transformers/.modeling_bert.py.swp


--------------------------------------------------------------------------------
/transformers/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.formatting.provider": "yapf",
3 |     "C_Cpp.dimInactiveRegions": false
4 | }


--------------------------------------------------------------------------------
/predict.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -v
 4 | set -e
 5 | 
 6 | 
 7 | INPUT_FILE=data/sighan15/TestInput.txt
 8 | OUTPUT_FILE=output.txt
 9 | MODEL_DIR=dcn_models/
10 | MAX_LENGTH=130
11 | BATCH_SIZE=4
12 | 
13 | python predict_DCN.py \
14 |     --model $MODEL_DIR \
15 |     --input_file $INPUT_FILE \
16 |     --output_file $OUTPUT_FILE \
17 |     --batch_size $BATCH_SIZE \
18 | 	--max_len $MAX_LENGTH
19 | 


--------------------------------------------------------------------------------
/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from argparse import ArgumentParser
 3 | 
 4 | 
 5 | class BaseTransformersCLICommand(ABC):
 6 |     @staticmethod
 7 |     @abstractmethod
 8 |     def register_subcommand(parser: ArgumentParser):
 9 |         raise NotImplementedError()
10 | 
11 |     @abstractmethod
12 |     def run(self):
13 |         raise NotImplementedError()
14 | 


--------------------------------------------------------------------------------
/transformers/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 | 
5 | from .glue import GlueDataset, GlueDataTrainingArguments
6 | from .language_modeling import LineByLineTextDataset, TextDataset
7 | from .dcn_language_modeling import PinyinShuffleLineByLineTextDataset
8 | 


--------------------------------------------------------------------------------
/transformers/another_try.py:
--------------------------------------------------------------------------------
 1 | from transformers import TFBertModel, BertTokenizer, BertConfig
 2 | import tensorflow as tf
 3 | 
 4 | config = BertConfig.from_pretrained("bert-base-cased", output_hidden_states=True)
 5 | model = TFBertModel.from_pretrained("bert-base-cased", config=config)
 6 | 
 7 | tok = BertTokenizer.from_pretrained("bert-base-cased")
 8 | text = tok.encode("Ain't this [MASK] best thing you've ever seen?")
 9 | 
10 | inputs = tf.constant(text)
11 | outputs = model.predict(inputs)
12 | 
13 | print(outputs)


--------------------------------------------------------------------------------
/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 | 
5 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
6 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
7 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
8 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
9 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "topk": 8,
 3 |   "pinyin_vocab_size": 404,
 4 |   "pinyin_mapping_path": "vocab/pinyin_mapping.txt",
 5 |   "attention_probs_dropout_prob": 0.1, 
 6 |   "directionality": "bidi", 
 7 |   "hidden_act": "gelu", 
 8 |   "hidden_dropout_prob": 0.1, 
 9 |   "hidden_size": 768, 
10 |   "initializer_range": 0.02, 
11 |   "intermediate_size": 3072, 
12 |   "max_position_embeddings": 512, 
13 |   "num_attention_heads": 12, 
14 |   "num_hidden_layers": 12, 
15 |   "pooler_fc_size": 768, 
16 |   "pooler_num_attention_heads": 12, 
17 |   "pooler_num_fc_layers": 3, 
18 |   "pooler_size_per_head": 128, 
19 |   "pooler_type": "first_token_transform", 
20 |   "type_vocab_size": 2, 
21 |   "vocab_size": 21128
22 | }
23 | 


--------------------------------------------------------------------------------
/chinese_roberta_wwm_ext_pytorch/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "topk": 8,
 3 |   "pinyin_vocab_size": 404,
 4 |   "pinyin_mapping_path": "vocab/pinyin_mapping.txt",
 5 |   "attention_probs_dropout_prob": 0.1, 
 6 |   "directionality": "bidi", 
 7 |   "hidden_act": "gelu", 
 8 |   "hidden_dropout_prob": 0.1, 
 9 |   "hidden_size": 768, 
10 |   "initializer_range": 0.02, 
11 |   "intermediate_size": 3072, 
12 |   "max_position_embeddings": 512, 
13 |   "num_attention_heads": 12, 
14 |   "num_hidden_layers": 12, 
15 |   "pooler_fc_size": 768, 
16 |   "pooler_num_attention_heads": 12, 
17 |   "pooler_num_fc_layers": 3, 
18 |   "pooler_size_per_head": 128, 
19 |   "pooler_type": "first_token_transform", 
20 |   "type_vocab_size": 2, 
21 |   "vocab_size": 21128
22 | }
23 | 


--------------------------------------------------------------------------------
/transformers/try.py:
--------------------------------------------------------------------------------
 1 | from transformers import TFAlbertForMaskedLM, TFAlbertModel, TFAlbertForSequenceClassification, AlbertForMaskedLM
 2 | import os
 3 | 
 4 | checkpoint = "albert-base-v1"
 5 | 
 6 | model = AlbertForMaskedLM.from_pretrained(checkpoint)
 7 | 
 8 | if not os.path.exists("~/saved/" + checkpoint):
 9 |     os.makedirs("~/saved/" + checkpoint)
10 |     
11 | 
12 | model.save_pretrained("~/saved/" + checkpoint)
13 | model = TFAlbertForMaskedLM.from_pretrained('~/saved/' + checkpoint, from_pt=True)
14 | model.save_pretrained("~/saved/" + checkpoint)
15 | model = TFAlbertModel.from_pretrained('~/saved/' + checkpoint)
16 | model = TFAlbertForMaskedLM.from_pretrained('~/saved/' + checkpoint)
17 | model = TFAlbertForSequenceClassification.from_pretrained('~/saved/' + checkpoint)
18 | 
19 | 
20 | print("nice model") 


--------------------------------------------------------------------------------
/transformers/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | from .metrics import is_sklearn_available
 6 | from .processors import (
 7 |     DataProcessor,
 8 |     InputExample,
 9 |     InputFeatures,
10 |     SingleSentenceClassificationProcessor,
11 |     SquadExample,
12 |     SquadFeatures,
13 |     SquadV1Processor,
14 |     SquadV2Processor,
15 |     glue_convert_examples_to_features,
16 |     glue_output_modes,
17 |     glue_processors,
18 |     glue_tasks_num_labels,
19 |     squad_convert_examples_to_features,
20 |     xnli_output_modes,
21 |     xnli_processors,
22 |     xnli_tasks_num_labels,
23 | )
24 | 
25 | 
26 | if is_sklearn_available():
27 |     from .metrics import glue_compute_metrics, xnli_compute_metrics
28 | 


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -v
 4 | set -e
 5 | 
 6 | 
 7 | TRAIN_FILE=data/train.txt
 8 | TEST_FILE=data/sighan15/test_format.txt
 9 | BERT_MODEL=chinese_roberta_wwm_ext_pytorch/
10 | OUTPUT_DIR=dcn_models/
11 | SAVE_STEPS=8794
12 | SEED=1038
13 | LR=5e-5
14 | SAVE_TOTAL_LIMIT=5
15 | MAX_LENGTH=130
16 | BATCH_SIZE=32
17 | NUM_EPOCHS=10
18 | 
19 | python train_DCN.py \
20 |     --output_dir $OUTPUT_DIR \
21 | 	--learning_rate $LR  \
22 |     --per_gpu_train_batch_size $BATCH_SIZE \
23 |     --model_type=bert \
24 |     --model_name_or_path=$BERT_MODEL \
25 |     --num_train_epochs $NUM_EPOCHS \
26 |     --save_steps $SAVE_STEPS \
27 | 	--logging_steps $SAVE_STEPS \
28 | 	--save_total_limit $SAVE_TOTAL_LIMIT \
29 | 	--block_size $MAX_LENGTH \
30 |     --train_data_file=$TRAIN_FILE \
31 |     --eval_data_file=$TEST_FILE \
32 |     --do_train \
33 |     --do_eval \
34 |     --do_predict \
35 | 	--evaluate_during_training \
36 |     --seed $SEED \
37 |     --mlm \
38 | 	--mlm_probability 0.15
39 | 


--------------------------------------------------------------------------------
/transformers/configuration_marian.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The OPUS-NMT Team, Marian team, and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ Marian model configuration """
16 | 
17 | from .configuration_bart import BartConfig
18 | 
19 | 
20 | PRETRAINED_CONFIG_ARCHIVE_MAP = {
21 |     "Helsinki-NLP/opus-mt-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json",
22 | }
23 | 
24 | 
25 | class MarianConfig(BartConfig):
26 |     model_type = "marian"
27 | 


--------------------------------------------------------------------------------
/transformers/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | import torch
 5 | 
 6 | from transformers.file_utils import WEIGHTS_NAME
 7 | 
 8 | 
 9 | DIALOGPT_MODELS = ["small", "medium", "large"]
10 | 
11 | OLD_KEY = "lm_head.decoder.weight"
12 | NEW_KEY = "lm_head.weight"
13 | 
14 | 
15 | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
16 |     d = torch.load(checkpoint_path)
17 |     d[NEW_KEY] = d.pop(OLD_KEY)
18 |     os.makedirs(pytorch_dump_folder_path, exist_ok=True)
19 |     torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument("--dialogpt_path", default=".", type=str)
25 |     args = parser.parse_args()
26 |     for MODEL in DIALOGPT_MODELS:
27 |         checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl")
28 |         pytorch_dump_folder_path = f"./DialoGPT-{MODEL}"
29 |         convert_dialogpt_checkpoint(
30 |             checkpoint_path, pytorch_dump_folder_path,
31 |         )
32 | 


--------------------------------------------------------------------------------
/transformers/filep.py:
--------------------------------------------------------------------------------
 1 | from transformers import GPT2LMHeadModel, GPT2Tokenizer
 2 | import torch
 3 | 
 4 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 5 | model = GPT2LMHeadModel.from_pretrained('gpt2')
 6 | 
 7 | generated = tokenizer.encode("The Manhattan bridge")
 8 | context = torch.tensor([generated])
 9 | past = None
10 | 
11 | for i in range(15):
12 |     output, past = model(context, past=past)
13 | 
14 |     distribution = output[0, :]
15 | 
16 |     # Get the top 10 values' indices and cast them to a list
17 |     top_values = distribution[-1].topk(10).indices.tolist()
18 | 
19 |     # Decode those into words
20 |     top_words = [tokenizer.decode([x]) for x in top_values.indices.tolist()]
21 | 
22 |     # select words (only arbitrarily select the first three)
23 |     words = words[0:3]
24 | 
25 |     # Cast them back to tokens which can be used as an added token
26 |     selected_tokens = [tokenizer.encode(word) for word in words]
27 | 
28 |     generated += [argmax_token.tolist()]
29 |     context = argmax_token.unsqueeze(0)
30 | 
31 |     print(tokenizer.decode([argmax_token.tolist()]))
32 | 
33 | sequence = tokenizer.decode(generated)
34 | 
35 | print(sequence)


--------------------------------------------------------------------------------
/transformers/trainer_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Dict, NamedTuple, Optional
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | try:
 8 |     import wandb
 9 | 
10 |     wandb.ensure_configured()
11 |     if wandb.api.api_key is None:
12 |         _has_wandb = False
13 |         wandb.termwarn("W&B installed but not logged in.  Run `wandb login` or set the WANDB_API_KEY env variable.")
14 |     else:
15 |         _has_wandb = False if os.getenv("WANDB_DISABLED") else True
16 | except (ImportError, AttributeError):
17 |     _has_wandb = False
18 | 
19 | 
20 | def is_wandb_available():
21 |     return _has_wandb
22 | 
23 | 
24 | class EvalPrediction(NamedTuple):
25 |     """
26 |     Evaluation output (always contains labels), to be used
27 |     to compute metrics.
28 |     """
29 | 
30 |     #predictions: Union[list, np.ndarray]
31 |     #label_ids: Union[list, np.ndarray]
32 |     predictions: np.ndarray
33 |     label_ids: np.ndarray
34 |     pass
35 | 
36 | 
37 | class PredictionOutput(NamedTuple):
38 |     predictions: np.ndarray
39 |     label_ids: Optional[np.ndarray]
40 |     metrics: Optional[Dict[str, float]]
41 | 
42 | 
43 | class TrainOutput(NamedTuple):
44 |     global_step: int
45 |     training_loss: float
46 | 
47 | 
48 | PREFIX_CHECKPOINT_DIR = "checkpoint"
49 | 


--------------------------------------------------------------------------------
/transformers/commands/transformers_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from argparse import ArgumentParser
 3 | 
 4 | from transformers.commands.convert import ConvertCommand
 5 | from transformers.commands.download import DownloadCommand
 6 | from transformers.commands.env import EnvironmentCommand
 7 | from transformers.commands.run import RunCommand
 8 | from transformers.commands.serving import ServeCommand
 9 | from transformers.commands.user import UserCommands
10 | 
11 | 
12 | def main():
13 |     parser = ArgumentParser("Transformers CLI tool", usage="transformers-cli <command> [<args>]")
14 |     commands_parser = parser.add_subparsers(help="transformers-cli command helpers")
15 | 
16 |     # Register commands
17 |     ConvertCommand.register_subcommand(commands_parser)
18 |     DownloadCommand.register_subcommand(commands_parser)
19 |     EnvironmentCommand.register_subcommand(commands_parser)
20 |     RunCommand.register_subcommand(commands_parser)
21 |     ServeCommand.register_subcommand(commands_parser)
22 |     UserCommands.register_subcommand(commands_parser)
23 | 
24 |     # Let's go
25 |     args = parser.parse_args()
26 | 
27 |     if not hasattr(args, "func"):
28 |         parser.print_help()
29 |         exit(1)
30 | 
31 |     # Run
32 |     service = args.func(args)
33 |     service.run()
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     main()
38 | 


--------------------------------------------------------------------------------
/transformers/commands/download.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from transformers.commands import BaseTransformersCLICommand
 4 | 
 5 | 
 6 | def download_command_factory(args):
 7 |     return DownloadCommand(args.model, args.cache_dir, args.force)
 8 | 
 9 | 
10 | class DownloadCommand(BaseTransformersCLICommand):
11 |     @staticmethod
12 |     def register_subcommand(parser: ArgumentParser):
13 |         download_parser = parser.add_parser("download")
14 |         download_parser.add_argument(
15 |             "--cache-dir", type=str, default=None, help="Path to location to store the models"
16 |         )
17 |         download_parser.add_argument(
18 |             "--force", action="store_true", help="Force the model to be download even if already in cache-dir"
19 |         )
20 |         download_parser.add_argument("model", type=str, help="Name of the model to download")
21 |         download_parser.set_defaults(func=download_command_factory)
22 | 
23 |     def __init__(self, model: str, cache: str, force: bool):
24 |         self._model = model
25 |         self._cache = cache
26 |         self._force = force
27 | 
28 |     def run(self):
29 |         from transformers import AutoModel, AutoTokenizer
30 | 
31 |         AutoModel.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
32 |         AutoTokenizer.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
33 | 


--------------------------------------------------------------------------------
/transformers/configuration_camembert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ CamemBERT configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | from .configuration_roberta import RobertaConfig
22 | 
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
27 |     "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-config.json",
28 |     "umberto-commoncrawl-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-commoncrawl-cased-v1/config.json",
29 |     "umberto-wikipedia-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-wikipedia-uncased-v1/config.json",
30 | }
31 | 
32 | 
33 | class CamembertConfig(RobertaConfig):
34 |     """
35 |     This class overrides :class:`~transformers.RobertaConfig`. Please check the
36 |     superclass for the appropriate documentation alongside usage examples.
37 |     """
38 | 
39 |     model_type = "camembert"
40 | 


--------------------------------------------------------------------------------
/transformers/configuration_mmbt.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # Copyright (c) HuggingFace Inc. team.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ MMBT configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | 
25 | class MMBTConfig(object):
26 |     """Configuration class to store the configuration of a `MMBT Model`.
27 | 
28 |     Args:
29 |         config (:obj:`~transformers.PreTrainedConfig`):
30 |             Config of the underlying Transformer models. Its values are
31 |             copied over to use a single config.
32 |         num_labels (:obj:`int` or :obj:`None`, optional, defaults to `None`):
33 |             Size of final Linear layer for classification.
34 |         modal_hidden_size (:obj:`int`, optional, defautls to 2048):
35 |             Embedding dimension of the non-text modality encoder.
36 |     """
37 | 
38 |     def __init__(self, config, num_labels=None, modal_hidden_size=2048):
39 |         self.__dict__ = config.__dict__
40 |         self.modal_hidden_size = modal_hidden_size
41 |         if num_labels:
42 |             self.num_labels = num_labels
43 | 


--------------------------------------------------------------------------------
/transformers/activations.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import math
 3 | 
 4 | import torch
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def swish(x):
12 |     return x * torch.sigmoid(x)
13 | 
14 | 
15 | def _gelu_python(x):
16 |     """ Original Implementation of the gelu activation function in Google Bert repo when initially created.
17 |         For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
18 |         0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
19 |         This is now written in C in torch.nn.functional
20 |         Also see https://arxiv.org/abs/1606.08415
21 |     """
22 |     return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
23 | 
24 | 
25 | def gelu_new(x):
26 |     """ Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
27 |         Also see https://arxiv.org/abs/1606.08415
28 |     """
29 |     return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
30 | 
31 | 
32 | if torch.__version__ < "1.4.0":
33 |     gelu = _gelu_python
34 | else:
35 |     gelu = F.gelu
36 | 
37 | 
38 | def gelu_fast(x):
39 |     return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 * (1.0 + 0.044715 * x * x)))
40 | 
41 | 
42 | ACT2FN = {
43 |     "relu": F.relu,
44 |     "swish": swish,
45 |     "gelu": gelu,
46 |     "tanh": torch.tanh,
47 |     "gelu_new": gelu_new,
48 |     "gelu_fast": gelu_fast,
49 | }
50 | 
51 | 
52 | def get_activation(activation_string):
53 |     if activation_string in ACT2FN:
54 |         return ACT2FN[activation_string]
55 |     else:
56 |         raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))
57 | 


--------------------------------------------------------------------------------
/transformers/convert_mobilebert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | 
 4 | import torch
 5 | 
 6 | from transformers import MobileBertConfig, MobileBertForPreTraining, load_tf_weights_in_mobilebert
 7 | 
 8 | 
 9 | logging.basicConfig(level=logging.INFO)
10 | 
11 | 
12 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, mobilebert_config_file, pytorch_dump_path):
13 |     # Initialise PyTorch model
14 |     config = MobileBertConfig.from_json_file(mobilebert_config_file)
15 |     print("Building PyTorch model from configuration: {}".format(str(config)))
16 |     model = MobileBertForPreTraining(config)
17 |     # Load weights from tf checkpoint
18 |     model = load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path)
19 |     # Save pytorch-model
20 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
21 |     torch.save(model.state_dict(), pytorch_dump_path)
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     parser = argparse.ArgumentParser()
26 |     # Required parameters
27 |     parser.add_argument(
28 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
29 |     )
30 |     parser.add_argument(
31 |         "--mobilebert_config_file",
32 |         default=None,
33 |         type=str,
34 |         required=True,
35 |         help="The config json file corresponding to the pre-trained MobileBERT model. \n"
36 |         "This specifies the model architecture.",
37 |     )
38 |     parser.add_argument(
39 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
40 |     )
41 |     args = parser.parse_args()
42 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.mobilebert_config_file, args.pytorch_dump_path)
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Dynamic Connected Networks for Chinese Spelling Check
 2 | 
 3 | This repository provides training code of DCN models for Chinese Spelling Check (CSC).
 4 | 
 5 | The paper has been accepted in Findings of ACL 2021.
 6 | 
 7 | 
 8 | ## Installation
 9 | Our code is based on [transformers 3.0](https://github.com/huggingface/transformers/tree/v3.0.0).
10 | 
11 | The following command installs all necessary packages:
12 | ```
13 | pip install -r requirements.txt
14 | ```
15 | We test our code using Python 3.6.
16 | 
17 | 
18 | ## Datasets
19 | The preprocessed training dataset can be downloaded from [here(password:hfiw)](https://pan.baidu.com/s/161ae-g2A7M0KnpJI79hLWg).
20 | 
21 | 
22 | ## Train Model
23 | To train the DCN model, download the [RoBERTa-wwm-ext](https://github.com/ymcui/Chinese-BERT-wwm) and copy the model to *chinese_roberta_wwm_ext_pytorch*, then run:
24 | ```
25 | sh train.sh
26 | ```
27 | 
28 | ## Experimental Result
29 | The sentence-level experimental results on SIGHAN15 for the default config are as follows:
30 | 
31 | | model | d-p | d-r | d-f | c-p | c-r | c-f |
32 | | - | - | - | - | - | - | - |
33 | | DCN | 76.84 | 79.64 | 78.21 | 74.74 | 77.45 | 76.07 |
34 | 
35 | 
36 | ## Citation
37 | ```
38 | @inproceedings{wang-etal-2021-dynamic,
39 |     title = "Dynamic Connected Networks for {C}hinese Spelling Check",
40 |     author = "Wang, Baoxin  and
41 |       Che, Wanxiang  and
42 |       Wu, Dayong  and
43 |       Wang, Shijin  and
44 |       Hu, Guoping  and
45 |       Liu, Ting",
46 |     booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
47 |     month = aug,
48 |     year = "2021",
49 |     address = "Online",
50 |     publisher = "Association for Computational Linguistics",
51 |     url = "https://aclanthology.org/2021.findings-acl.216",
52 |     doi = "10.18653/v1/2021.findings-acl.216",
53 |     pages = "2437--2446",
54 | }
55 | ```
56 | 
57 | ## Related Work
58 | * [CTC 2021](https://github.com/destwang/CTC2021)
59 | * [CTC Resources](https://github.com/destwang/CTCResources)
60 | 


--------------------------------------------------------------------------------
/transformers/utils_encoder_decoder.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ Classes to support Encoder-Decoder architectures """
16 | 
17 | 
18 | def prepare_encoder_decoder_model_kwargs(**kwargs):
19 |     """ Prepare the encoder and decoder's keyword arguments.
20 | 
21 |     Keyword arguments come in 3 flavors:
22 |     - encoder-specific (prefixed by `encoder_`)
23 |     - decoder-specific (prefixed by `decoder_`)
24 |     - those that apply to the model as whole.
25 | 
26 |     We let the specific kwargs override the common ones in case of
27 |     conflict.
28 |     """
29 | 
30 |     kwargs_common = {
31 |         argument: value
32 |         for argument, value in kwargs.items()
33 |         if not argument.startswith("encoder_") and not argument.startswith("decoder_")
34 |     }
35 |     if "input_ids" in kwargs_common:
36 |         kwargs["encoder_input_ids"] = kwargs_common.pop("input_ids")
37 | 
38 |     decoder_kwargs = kwargs_common.copy()
39 |     encoder_kwargs = kwargs_common.copy()
40 |     encoder_kwargs.update(
41 |         {argument[len("encoder_") :]: value for argument, value in kwargs.items() if argument.startswith("encoder_")}
42 |     )
43 |     decoder_kwargs.update(
44 |         {argument[len("decoder_") :]: value for argument, value in kwargs.items() if argument.startswith("decoder_")}
45 |     )
46 |     decoder_kwargs["encoder_attention_mask"] = encoder_kwargs.get("attention_mask", None)
47 |     return encoder_kwargs, decoder_kwargs
48 | 


--------------------------------------------------------------------------------
/transformers/configuration_xlm_roberta.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ XLM-RoBERTa configuration """
17 | 
18 | 
19 | import logging
20 | 
21 | from .configuration_roberta import RobertaConfig
22 | 
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
27 |     "xlm-roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-base-config.json",
28 |     "xlm-roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-config.json",
29 |     "xlm-roberta-large-finetuned-conll02-dutch": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-dutch-config.json",
30 |     "xlm-roberta-large-finetuned-conll02-spanish": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-spanish-config.json",
31 |     "xlm-roberta-large-finetuned-conll03-english": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-english-config.json",
32 |     "xlm-roberta-large-finetuned-conll03-german": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-german-config.json",
33 | }
34 | 
35 | 
36 | class XLMRobertaConfig(RobertaConfig):
37 |     """
38 |     This class overrides :class:`~transformers.RobertaConfig`. Please check the
39 |     superclass for the appropriate documentation alongside usage examples.
40 |     """
41 | 
42 |     model_type = "xlm-roberta"
43 | 


--------------------------------------------------------------------------------
/transformers/commands/env.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | from argparse import ArgumentParser
 3 | 
 4 | from transformers import __version__ as version
 5 | from transformers import is_tf_available, is_torch_available
 6 | from transformers.commands import BaseTransformersCLICommand
 7 | 
 8 | 
 9 | def info_command_factory(_):
10 |     return EnvironmentCommand()
11 | 
12 | 
13 | class EnvironmentCommand(BaseTransformersCLICommand):
14 |     @staticmethod
15 |     def register_subcommand(parser: ArgumentParser):
16 |         download_parser = parser.add_parser("env")
17 |         download_parser.set_defaults(func=info_command_factory)
18 | 
19 |     def run(self):
20 |         pt_version = "not installed"
21 |         pt_cuda_available = "NA"
22 |         if is_torch_available():
23 |             import torch
24 | 
25 |             pt_version = torch.__version__
26 |             pt_cuda_available = torch.cuda.is_available()
27 | 
28 |         tf_version = "not installed"
29 |         tf_cuda_available = "NA"
30 |         if is_tf_available():
31 |             import tensorflow as tf
32 | 
33 |             tf_version = tf.__version__
34 |             try:
35 |                 # deprecated in v2.1
36 |                 tf_cuda_available = tf.test.is_gpu_available()
37 |             except AttributeError:
38 |                 # returns list of devices, convert to bool
39 |                 tf_cuda_available = bool(tf.config.list_physical_devices("GPU"))
40 | 
41 |         info = {
42 |             "`transformers` version": version,
43 |             "Platform": platform.platform(),
44 |             "Python version": platform.python_version(),
45 |             "PyTorch version (GPU?)": "{} ({})".format(pt_version, pt_cuda_available),
46 |             "Tensorflow version (GPU?)": "{} ({})".format(tf_version, tf_cuda_available),
47 |             "Using GPU in script?": "<fill in>",
48 |             "Using distributed or parallel set-up in script?": "<fill in>",
49 |         }
50 | 
51 |         print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
52 |         print(self.format_dict(info))
53 | 
54 |         return info
55 | 
56 |     @staticmethod
57 |     def format_dict(d):
58 |         return "\n".join(["- {}: {}".format(prop, val) for prop, val in d.items()]) + "\n"
59 | 


--------------------------------------------------------------------------------
/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The T5 authors and HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert T5 checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import T5Config, T5Model, load_tf_weights_in_t5
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = T5Config.from_json_file(config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 |     model = T5Model(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_t5(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained T5 model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert BERT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = BertConfig.from_json_file(bert_config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 |     model = BertForPreTraining(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_bert(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--bert_config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained BERT model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.bert_config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert ALBERT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import AlbertConfig, AlbertForPreTraining, load_tf_weights_in_albert
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path):
30 |     # Initialise PyTorch model
31 |     config = AlbertConfig.from_json_file(albert_config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 |     model = AlbertForPreTraining(config)
34 | 
35 |     # Load weights from tf checkpoint
36 |     load_tf_weights_in_albert(model, config, tf_checkpoint_path)
37 | 
38 |     # Save pytorch-model
39 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
40 |     torch.save(model.state_dict(), pytorch_dump_path)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     # Required parameters
46 |     parser.add_argument(
47 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
48 |     )
49 |     parser.add_argument(
50 |         "--albert_config_file",
51 |         default=None,
52 |         type=str,
53 |         required=True,
54 |         help="The config json file corresponding to the pre-trained ALBERT model. \n"
55 |         "This specifies the model architecture.",
56 |     )
57 |     parser.add_argument(
58 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
59 |     )
60 |     args = parser.parse_args()
61 |     convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.albert_config_file, args.pytorch_dump_path)
62 | 


--------------------------------------------------------------------------------
/transformers/modeling_marian.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 Marian Team Authors and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """PyTorch MarianMTModel model, ported from the Marian C++ repo."""
16 | 
17 | 
18 | from transformers.modeling_bart import BartForConditionalGeneration
19 | 
20 | 
21 | MARIAN_PRETRAINED_MODEL_ARCHIVE_LIST = [
22 |     # See all Marian models at https://huggingface.co/models?search=Helsinki-NLP
23 | ]
24 | 
25 | 
26 | class MarianMTModel(BartForConditionalGeneration):
27 |     r"""
28 |     Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints.
29 |     Model API is identical to BartForConditionalGeneration.
30 |     Available models are listed at `Model List <https://huggingface.co/models?search=Helsinki-NLP>`__
31 | 
32 |     Examples::
33 | 
34 |         >>> from transformers import MarianTokenizer, MarianMTModel
35 |         >>> from typing import List
36 |         >>> src = 'fr'  # source language
37 |         >>> trg = 'en'  # target language
38 |         >>> sample_text = "où est l'arrêt de bus ?"
39 |         >>> mname = f'Helsinki-NLP/opus-mt-{src}-{trg}'
40 | 
41 |         >>> model = MarianMTModel.from_pretrained(mname)
42 |         >>> tok = MarianTokenizer.from_pretrained(mname)
43 |         >>> batch = tok.prepare_translation_batch(src_texts=[sample_text])  # don't need tgt_text for inference
44 |         >>> gen = model.generate(**batch)  # for forward pass: model(**batch)
45 |         >>> words: List[str] = tok.batch_decode(gen, skip_special_tokens=True)  # returns "Where is the the bus stop ?"
46 | 
47 |     """
48 | 
49 |     def adjust_logits_during_generation(self, logits, cur_len, max_length):
50 |         logits[:, self.config.pad_token_id] = float("-inf")
51 |         if cur_len == max_length - 1 and self.config.eos_token_id is not None:
52 |             self._force_token_ids_generation(logits, self.config.eos_token_id)
53 |         return logits
54 | 


--------------------------------------------------------------------------------
/transformers/tokenization_longformer.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Allen Institute for AI team and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import logging
17 | 
18 | from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
19 | 
20 | 
21 | logger = logging.getLogger(__name__)
22 | 
23 | 
24 | # vocab and merges same as roberta
25 | vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json"
26 | merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt"
27 | _all_longformer_models = [
28 |     "allenai/longformer-base-4096",
29 |     "allenai/longformer-large-4096",
30 |     "allenai/longformer-large-4096-finetuned-triviaqa",
31 |     "allenai/longformer-base-4096-extra.pos.embd.only",
32 |     "allenai/longformer-large-4096-extra.pos.embd.only",
33 | ]
34 | 
35 | 
36 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
37 |     "allenai/longformer-base-4096": 4096,
38 |     "allenai/longformer-large-4096": 4096,
39 |     "allenai/longformer-large-4096-finetuned-triviaqa": 4096,
40 |     "allenai/longformer-base-4096-extra.pos.embd.only": 4096,
41 |     "allenai/longformer-large-4096-extra.pos.embd.only": 4096,
42 | }
43 | 
44 | 
45 | class LongformerTokenizer(RobertaTokenizer):
46 |     # merges and vocab same as Roberta
47 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
48 |     pretrained_vocab_files_map = {
49 |         "vocab_file": {m: vocab_url for m in _all_longformer_models},
50 |         "merges_file": {m: merges_url for m in _all_longformer_models},
51 |     }
52 | 
53 | 
54 | class LongformerTokenizerFast(RobertaTokenizerFast):
55 |     # merges and vocab same as Roberta
56 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
57 |     pretrained_vocab_files_map = {
58 |         "vocab_file": {m: vocab_url for m in _all_longformer_models},
59 |         "merges_file": {m: merges_url for m in _all_longformer_models},
60 |     }
61 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # model & dataset
  2 | *.bin
  3 | train.txt
  4 | dcn_models/
  5 | runs/
  6 | 
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | pip-wheel-metadata/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 


--------------------------------------------------------------------------------
/transformers/training_args_tf.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from dataclasses import dataclass, field
 3 | from typing import Tuple
 4 | 
 5 | from .file_utils import cached_property, is_tf_available, tf_required
 6 | from .training_args import TrainingArguments
 7 | 
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | if is_tf_available():
12 |     import tensorflow as tf
13 | 
14 | 
15 | @dataclass
16 | class TFTrainingArguments(TrainingArguments):
17 |     tpu_name: str = field(
18 |         default=None, metadata={"help": "Name of TPU"},
19 |     )
20 |     eval_steps: int = field(default=1000, metadata={"help": "Run an evaluation every X steps."})
21 |     debug: bool = field(
22 |         default=False, metadata={"help": "Activate the trace to record computation graphs and profiling information"}
23 |     )
24 | 
25 |     @cached_property
26 |     @tf_required
27 |     def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
28 |         logger.info("Tensorflow: setting up strategy")
29 |         gpus = tf.config.list_physical_devices("GPU")
30 | 
31 |         if self.no_cuda:
32 |             strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
33 |         else:
34 |             try:
35 |                 if self.tpu_name:
36 |                     tpu = tf.distribute.cluster_resolver.TPUClusterResolver(self.tpu_name)
37 |                 else:
38 |                     tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
39 |             except ValueError:
40 |                 tpu = None
41 | 
42 |             if tpu:
43 |                 tf.config.experimental_connect_to_cluster(tpu)
44 |                 tf.tpu.experimental.initialize_tpu_system(tpu)
45 | 
46 |                 strategy = tf.distribute.experimental.TPUStrategy(tpu)
47 |             elif len(gpus) == 0:
48 |                 strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
49 |             elif len(gpus) == 1:
50 |                 strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
51 |             elif len(gpus) > 1:
52 |                 # If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
53 |                 strategy = tf.distribute.MirroredStrategy()
54 |             else:
55 |                 raise ValueError("Cannot find the proper strategy please check your environment properties.")
56 | 
57 |         return strategy
58 | 
59 |     @property
60 |     @tf_required
61 |     def strategy(self) -> "tf.distribute.Strategy":
62 |         return self._setup_strategy
63 | 
64 |     @property
65 |     @tf_required
66 |     def n_gpu(self) -> int:
67 |         return self._setup_strategy.num_replicas_in_sync
68 | 


--------------------------------------------------------------------------------
/transformers/tokenization_mobilebert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tokenization classes for MobileBERT."""
15 | 
16 | 
17 | import logging
18 | 
19 | from .tokenization_bert import BertTokenizer, BertTokenizerFast
20 | 
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
25 | 
26 | PRETRAINED_VOCAB_FILES_MAP = {
27 |     "vocab_file": {
28 |         "mobilebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/google/mobilebert-uncased/vocab.txt"
29 |     }
30 | }
31 | 
32 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
33 | 
34 | 
35 | PRETRAINED_INIT_CONFIGURATION = {}
36 | 
37 | 
38 | class MobileBertTokenizer(BertTokenizer):
39 |     r"""
40 |     Constructs a  MobileBertTokenizer.
41 | 
42 |     :class:`~transformers.MobileBertTokenizer is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
43 |     tokenization: punctuation splitting + wordpiece.
44 | 
45 |     Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
46 |     parameters.
47 |     """
48 | 
49 |     vocab_files_names = VOCAB_FILES_NAMES
50 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
51 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
52 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
53 | 
54 | 
55 | class MobileBertTokenizerFast(BertTokenizerFast):
56 |     r"""
57 |     Constructs a  "Fast" MobileBertTokenizer (backed by HuggingFace's `tokenizers` library).
58 | 
59 |     :class:`~transformers.MobileBertTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs end-to-end
60 |     tokenization: punctuation splitting + wordpiece.
61 | 
62 |     Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
63 |     parameters.
64 |     """
65 | 
66 |     vocab_files_names = VOCAB_FILES_NAMES
67 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
68 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
69 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
70 | 


--------------------------------------------------------------------------------
/transformers/convert_gpt2_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert OpenAI GPT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import CONFIG_NAME, WEIGHTS_NAME, GPT2Config, GPT2Model, load_tf_weights_in_gpt2
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, pytorch_dump_folder_path):
30 |     # Construct model
31 |     if gpt2_config_file == "":
32 |         config = GPT2Config()
33 |     else:
34 |         config = GPT2Config.from_json_file(gpt2_config_file)
35 |     model = GPT2Model(config)
36 | 
37 |     # Load weights from numpy
38 |     load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path)
39 | 
40 |     # Save pytorch-model
41 |     pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
42 |     pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
43 |     print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
44 |     torch.save(model.state_dict(), pytorch_weights_dump_path)
45 |     print("Save configuration file to {}".format(pytorch_config_dump_path))
46 |     with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
47 |         f.write(config.to_json_string())
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser()
52 |     # Required parameters
53 |     parser.add_argument(
54 |         "--gpt2_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
55 |     )
56 |     parser.add_argument(
57 |         "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
58 |     )
59 |     parser.add_argument(
60 |         "--gpt2_config_file",
61 |         default="",
62 |         type=str,
63 |         help="An optional config json file corresponding to the pre-trained OpenAI model. \n"
64 |         "This specifies the model architecture.",
65 |     )
66 |     args = parser.parse_args()
67 |     convert_gpt2_checkpoint_to_pytorch(args.gpt2_checkpoint_path, args.gpt2_config_file, args.pytorch_dump_folder_path)
68 | 


--------------------------------------------------------------------------------
/transformers/tokenization_retribert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for RetriBERT."""
16 | 
17 | 
18 | import logging
19 | 
20 | from .tokenization_bert import BertTokenizer, BertTokenizerFast
21 | 
22 | 
23 | logger = logging.getLogger(__name__)
24 | 
25 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
26 | 
27 | PRETRAINED_VOCAB_FILES_MAP = {
28 |     "vocab_file": {
29 |         "yjernite/retribert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
30 |     }
31 | }
32 | 
33 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
34 |     "yjernite/retribert-base-uncased": 512,
35 | }
36 | 
37 | 
38 | PRETRAINED_INIT_CONFIGURATION = {
39 |     "yjernite/retribert-base-uncased": {"do_lower_case": True},
40 | }
41 | 
42 | 
43 | class RetriBertTokenizer(BertTokenizer):
44 |     r"""
45 |     Constructs a  retribert.
46 | 
47 |     :class:`~transformers.retribert is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
48 |     tokenization: punctuation splitting + wordpiece.
49 | 
50 |     Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
51 |     parameters.
52 |     """
53 | 
54 |     vocab_files_names = VOCAB_FILES_NAMES
55 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
56 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
57 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
58 |     model_input_names = ["attention_mask"]
59 | 
60 | 
61 | class RetriBertTokenizerFast(BertTokenizerFast):
62 |     r"""
63 |     Constructs a  "Fast" RetriBertTokenizerFast (backed by HuggingFace's `tokenizers` library).
64 | 
65 |     :class:`~transformers.RetriBertTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs end-to-end
66 |     tokenization: punctuation splitting + wordpiece.
67 | 
68 |     Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
69 |     parameters.
70 |     """
71 | 
72 |     vocab_files_names = VOCAB_FILES_NAMES
73 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
74 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
75 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
76 |     model_input_names = ["attention_mask"]
77 | 


--------------------------------------------------------------------------------
/transformers/convert_openai_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert OpenAI GPT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import CONFIG_NAME, WEIGHTS_NAME, OpenAIGPTConfig, OpenAIGPTModel, load_tf_weights_in_openai_gpt
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_config_file, pytorch_dump_folder_path):
30 |     # Construct model
31 |     if openai_config_file == "":
32 |         config = OpenAIGPTConfig()
33 |     else:
34 |         config = OpenAIGPTConfig.from_json_file(openai_config_file)
35 |     model = OpenAIGPTModel(config)
36 | 
37 |     # Load weights from numpy
38 |     load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path)
39 | 
40 |     # Save pytorch-model
41 |     pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
42 |     pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
43 |     print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
44 |     torch.save(model.state_dict(), pytorch_weights_dump_path)
45 |     print("Save configuration file to {}".format(pytorch_config_dump_path))
46 |     with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
47 |         f.write(config.to_json_string())
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser()
52 |     # Required parameters
53 |     parser.add_argument(
54 |         "--openai_checkpoint_folder_path",
55 |         default=None,
56 |         type=str,
57 |         required=True,
58 |         help="Path to the TensorFlow checkpoint path.",
59 |     )
60 |     parser.add_argument(
61 |         "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
62 |     )
63 |     parser.add_argument(
64 |         "--openai_config_file",
65 |         default="",
66 |         type=str,
67 |         help="An optional config json file corresponding to the pre-trained OpenAI model. \n"
68 |         "This specifies the model architecture.",
69 |     )
70 |     args = parser.parse_args()
71 |     convert_openai_checkpoint_to_pytorch(
72 |         args.openai_checkpoint_folder_path, args.openai_config_file, args.pytorch_dump_folder_path
73 |     )
74 | 


--------------------------------------------------------------------------------
/transformers/benchmark/benchmark_args.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import logging
18 | from dataclasses import dataclass, field
19 | from typing import Tuple
20 | 
21 | from ..file_utils import cached_property, is_torch_available, is_torch_tpu_available, torch_required
22 | from .benchmark_args_utils import BenchmarkArguments
23 | 
24 | 
25 | if is_torch_available():
26 |     import torch
27 | 
28 | if is_torch_tpu_available():
29 |     import torch_xla.core.xla_model as xm
30 | 
31 | 
32 | logger = logging.getLogger(__name__)
33 | 
34 | 
35 | @dataclass
36 | class PyTorchBenchmarkArguments(BenchmarkArguments):
37 |     torchscript: bool = field(default=False, metadata={"help": "Trace the models using torchscript"})
38 |     torch_xla_tpu_print_metrics: bool = field(default=False, metadata={"help": "Print Xla/PyTorch tpu metrics"})
39 |     fp16_opt_level: str = field(
40 |         default="O1",
41 |         metadata={
42 |             "help": (
43 |                 "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
44 |                 "See details at https://nvidia.github.io/apex/amp.html"
45 |             )
46 |         },
47 |     )
48 | 
49 |     @cached_property
50 |     @torch_required
51 |     def _setup_devices(self) -> Tuple["torch.device", int]:
52 |         logger.info("PyTorch: setting up devices")
53 |         if self.no_cuda:
54 |             device = torch.device("cpu")
55 |             n_gpu = 0
56 |         elif is_torch_tpu_available():
57 |             device = xm.xla_device()
58 |             n_gpu = 0
59 |         else:
60 |             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
61 |             n_gpu = torch.cuda.device_count()
62 |         return device, n_gpu
63 | 
64 |     @property
65 |     def is_tpu(self):
66 |         return is_torch_tpu_available() and not self.no_tpu
67 | 
68 |     @property
69 |     @torch_required
70 |     def device_idx(self) -> int:
71 |         # TODO(PVP): currently only single GPU is supported
72 |         return torch.cuda.current_device()
73 | 
74 |     @property
75 |     @torch_required
76 |     def device(self) -> "torch.device":
77 |         return self._setup_devices[0]
78 | 
79 |     @property
80 |     @torch_required
81 |     def n_gpu(self):
82 |         return self._setup_devices[1]
83 | 
84 |     @property
85 |     def is_gpu(self):
86 |         return self.n_gpu > 0
87 | 


--------------------------------------------------------------------------------
/transformers/convert_electra_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert ELECTRA checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import logging
20 | 
21 | import torch
22 | 
23 | from transformers import ElectraConfig, ElectraForMaskedLM, ElectraForPreTraining, load_tf_weights_in_electra
24 | 
25 | 
26 | logging.basicConfig(level=logging.INFO)
27 | 
28 | 
29 | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path, discriminator_or_generator):
30 |     # Initialise PyTorch model
31 |     config = ElectraConfig.from_json_file(config_file)
32 |     print("Building PyTorch model from configuration: {}".format(str(config)))
33 | 
34 |     if discriminator_or_generator == "discriminator":
35 |         model = ElectraForPreTraining(config)
36 |     elif discriminator_or_generator == "generator":
37 |         model = ElectraForMaskedLM(config)
38 |     else:
39 |         raise ValueError("The discriminator_or_generator argument should be either 'discriminator' or 'generator'")
40 | 
41 |     # Load weights from tf checkpoint
42 |     load_tf_weights_in_electra(
43 |         model, config, tf_checkpoint_path, discriminator_or_generator=discriminator_or_generator
44 |     )
45 | 
46 |     # Save pytorch-model
47 |     print("Save PyTorch model to {}".format(pytorch_dump_path))
48 |     torch.save(model.state_dict(), pytorch_dump_path)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     parser = argparse.ArgumentParser()
53 |     # Required parameters
54 |     parser.add_argument(
55 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
56 |     )
57 |     parser.add_argument(
58 |         "--config_file",
59 |         default=None,
60 |         type=str,
61 |         required=True,
62 |         help="The config json file corresponding to the pre-trained model. \n"
63 |         "This specifies the model architecture.",
64 |     )
65 |     parser.add_argument(
66 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
67 |     )
68 |     parser.add_argument(
69 |         "--discriminator_or_generator",
70 |         default=None,
71 |         type=str,
72 |         required=True,
73 |         help="Whether to export the generator or the discriminator. Should be a string, either 'discriminator' or "
74 |         "'generator'.",
75 |     )
76 |     args = parser.parse_args()
77 |     convert_tf_checkpoint_to_pytorch(
78 |         args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path, args.discriminator_or_generator
79 |     )
80 | 


--------------------------------------------------------------------------------
/transformers/data/datasets/dcn_language_modeling.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import pickle
 4 | import time
 5 | 
 6 | import torch
 7 | from filelock import FileLock
 8 | from torch.utils.data.dataset import Dataset
 9 | 
10 | from ...tokenization_utils import PreTrainedTokenizer
11 | import random
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class PinyinShuffleLineByLineTextDataset(Dataset):
17 |     """
18 |     This will be superseded by a framework-agnostic approach
19 |     soon.
20 |     """
21 |     def __init__(self,
22 |                  tokenizer: PreTrainedTokenizer,
23 |                  file_path: str,
24 |                  block_size: int,
25 |                  shuffle=True):
26 |         shuffle = False
27 |         print(file_path, os.path.isfile(file_path))
28 |         assert os.path.isfile(file_path)
29 |         # Here, we do not cache the features, operating under the assumption
30 |         # that we will soon use fast multithreaded tokenizers from the
31 |         # `tokenizers` repo everywhere =)
32 |         logger.info("Creating features from dataset file at %s", file_path)
33 | 
34 |         self.count = 0
35 |         self.file_path = file_path
36 |         self.block_size = block_size
37 |         self.tokenizer = tokenizer
38 |         with open(file_path, encoding="utf-8") as f:
39 |             for line in f:
40 |                 line = line.strip()
41 |                 if len(line.split('\t')) >= 4:
42 |                     self.count += 1
43 |         self.input_file = open(file_path, encoding="utf-8")
44 |         self.lines = self.input_file.readlines()
45 |         if shuffle:
46 |             random.shuffle(self.lines)
47 | 
48 |     def __len__(self):
49 |         return self.count
50 | 
51 |     def __getitem__(self, i) -> torch.Tensor:
52 |         block_size = self.block_size
53 |         line = self.lines[i]
54 |         line = line.strip()
55 | 
56 |         line_input, line_label, line_mask, line_pos = line.split('\t')[:4]
57 |         line_input_items = line_input.split()
58 |         line_label_items = line_label.split()
59 |         line_mask_items = line_mask.split()
60 |         line_pos_items = line_pos.split()
61 |         assert len(line_input_items) == len(line_label_items) == len(
62 |             line_mask_items) == len(line_pos_items)
63 |         input_ids = self.tokenizer.convert_tokens_to_ids(
64 |             ["[CLS]"] + line_input_items[:block_size - 2] + ["[SEP]"])
65 |         label_ids = self.tokenizer.convert_tokens_to_ids(
66 |             ["[CLS]"] + line_label_items[:block_size - 2] + ["[SEP]"])
67 | 
68 |         mask_ids = [0] + [
69 |             int(item) for item in line_mask_items[:block_size - 2]
70 |         ] + [0]
71 |         pos_ids = [0
72 |                    ] + [int(item)
73 |                         for item in line_pos_items[:block_size - 2]] + [0]
74 |         assert len(input_ids) == len(label_ids) == len(mask_ids) == len(
75 |             pos_ids)
76 |         return torch.tensor(input_ids, dtype=torch.long), torch.tensor(
77 |             label_ids, dtype=torch.long), torch.tensor(
78 |                 mask_ids, dtype=torch.long), torch.tensor(pos_ids,
79 |                                                           dtype=torch.long)
80 | 
81 | 


--------------------------------------------------------------------------------
/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert OpenAI GPT checkpoint."""
16 | 
17 | 
18 | import argparse
19 | import json
20 | import logging
21 | 
22 | import numpy
23 | import torch
24 | 
25 | from transformers import CONFIG_NAME, WEIGHTS_NAME
26 | from transformers.tokenization_xlm import VOCAB_FILES_NAMES
27 | 
28 | 
29 | logging.basicConfig(level=logging.INFO)
30 | 
31 | 
32 | def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_path):
33 |     # Load checkpoint
34 |     chkpt = torch.load(xlm_checkpoint_path, map_location="cpu")
35 | 
36 |     state_dict = chkpt["model"]
37 | 
38 |     # We have the base model one level deeper than the original XLM repository
39 |     two_levels_state_dict = {}
40 |     for k, v in state_dict.items():
41 |         if "pred_layer" in k:
42 |             two_levels_state_dict[k] = v
43 |         else:
44 |             two_levels_state_dict["transformer." + k] = v
45 | 
46 |     config = chkpt["params"]
47 |     config = dict((n, v) for n, v in config.items() if not isinstance(v, (torch.FloatTensor, numpy.ndarray)))
48 | 
49 |     vocab = chkpt["dico_word2id"]
50 |     vocab = dict((s + "</w>" if s.find("@@") == -1 and i > 13 else s.replace("@@", ""), i) for s, i in vocab.items())
51 | 
52 |     # Save pytorch-model
53 |     pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
54 |     pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME
55 |     pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["vocab_file"]
56 | 
57 |     print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
58 |     torch.save(two_levels_state_dict, pytorch_weights_dump_path)
59 | 
60 |     print("Save configuration file to {}".format(pytorch_config_dump_path))
61 |     with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
62 |         f.write(json.dumps(config, indent=2) + "\n")
63 | 
64 |     print("Save vocab file to {}".format(pytorch_config_dump_path))
65 |     with open(pytorch_vocab_dump_path, "w", encoding="utf-8") as f:
66 |         f.write(json.dumps(vocab, indent=2) + "\n")
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     parser = argparse.ArgumentParser()
71 |     # Required parameters
72 |     parser.add_argument(
73 |         "--xlm_checkpoint_path", default=None, type=str, required=True, help="Path the official PyTorch dump."
74 |     )
75 |     parser.add_argument(
76 |         "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
77 |     )
78 |     args = parser.parse_args()
79 |     convert_xlm_checkpoint_to_pytorch(args.xlm_checkpoint_path, args.pytorch_dump_folder_path)
80 | 


--------------------------------------------------------------------------------
/transformers/data/processors/xnli.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ XNLI utils (dataset loading and evaluation) """
17 | 
18 | 
19 | import logging
20 | import os
21 | 
22 | from .utils import DataProcessor, InputExample
23 | 
24 | 
25 | logger = logging.getLogger(__name__)
26 | 
27 | 
28 | class XnliProcessor(DataProcessor):
29 |     """Processor for the XNLI dataset.
30 |     Adapted from https://github.com/google-research/bert/blob/f39e881b169b9d53bea03d2d341b31707a6c052b/run_classifier.py#L207"""
31 | 
32 |     def __init__(self, language, train_language=None):
33 |         self.language = language
34 |         self.train_language = train_language
35 | 
36 |     def get_train_examples(self, data_dir):
37 |         """See base class."""
38 |         lg = self.language if self.train_language is None else self.train_language
39 |         lines = self._read_tsv(os.path.join(data_dir, "XNLI-MT-1.0/multinli/multinli.train.{}.tsv".format(lg)))
40 |         examples = []
41 |         for (i, line) in enumerate(lines):
42 |             if i == 0:
43 |                 continue
44 |             guid = "%s-%s" % ("train", i)
45 |             text_a = line[0]
46 |             text_b = line[1]
47 |             label = "contradiction" if line[2] == "contradictory" else line[2]
48 |             assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
49 |             examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
50 |         return examples
51 | 
52 |     def get_test_examples(self, data_dir):
53 |         """See base class."""
54 |         lines = self._read_tsv(os.path.join(data_dir, "XNLI-1.0/xnli.test.tsv"))
55 |         examples = []
56 |         for (i, line) in enumerate(lines):
57 |             if i == 0:
58 |                 continue
59 |             language = line[0]
60 |             if language != self.language:
61 |                 continue
62 |             guid = "%s-%s" % ("test", i)
63 |             text_a = line[6]
64 |             text_b = line[7]
65 |             label = line[1]
66 |             assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
67 |             examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
68 |         return examples
69 | 
70 |     def get_labels(self):
71 |         """See base class."""
72 |         return ["contradiction", "entailment", "neutral"]
73 | 
74 | 
75 | xnli_processors = {
76 |     "xnli": XnliProcessor,
77 | }
78 | 
79 | xnli_output_modes = {
80 |     "xnli": "classification",
81 | }
82 | 
83 | xnli_tasks_num_labels = {
84 |     "xnli": 3,
85 | }
86 | 


--------------------------------------------------------------------------------
/transformers/data/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | try:
18 |     from scipy.stats import pearsonr, spearmanr
19 |     from sklearn.metrics import matthews_corrcoef, f1_score
20 | 
21 |     _has_sklearn = True
22 | except (AttributeError, ImportError):
23 |     _has_sklearn = False
24 | 
25 | 
26 | def is_sklearn_available():
27 |     return _has_sklearn
28 | 
29 | 
30 | if _has_sklearn:
31 | 
32 |     def simple_accuracy(preds, labels):
33 |         return (preds == labels).mean()
34 | 
35 |     def acc_and_f1(preds, labels):
36 |         acc = simple_accuracy(preds, labels)
37 |         f1 = f1_score(y_true=labels, y_pred=preds)
38 |         return {
39 |             "acc": acc,
40 |             "f1": f1,
41 |             "acc_and_f1": (acc + f1) / 2,
42 |         }
43 | 
44 |     def pearson_and_spearman(preds, labels):
45 |         pearson_corr = pearsonr(preds, labels)[0]
46 |         spearman_corr = spearmanr(preds, labels)[0]
47 |         return {
48 |             "pearson": pearson_corr,
49 |             "spearmanr": spearman_corr,
50 |             "corr": (pearson_corr + spearman_corr) / 2,
51 |         }
52 | 
53 |     def glue_compute_metrics(task_name, preds, labels):
54 |         assert len(preds) == len(labels)
55 |         if task_name == "cola":
56 |             return {"mcc": matthews_corrcoef(labels, preds)}
57 |         elif task_name == "sst-2":
58 |             return {"acc": simple_accuracy(preds, labels)}
59 |         elif task_name == "mrpc":
60 |             return acc_and_f1(preds, labels)
61 |         elif task_name == "sts-b":
62 |             return pearson_and_spearman(preds, labels)
63 |         elif task_name == "qqp":
64 |             return acc_and_f1(preds, labels)
65 |         elif task_name == "mnli":
66 |             return {"mnli/acc": simple_accuracy(preds, labels)}
67 |         elif task_name == "mnli-mm":
68 |             return {"mnli-mm/acc": simple_accuracy(preds, labels)}
69 |         elif task_name == "qnli":
70 |             return {"acc": simple_accuracy(preds, labels)}
71 |         elif task_name == "rte":
72 |             return {"acc": simple_accuracy(preds, labels)}
73 |         elif task_name == "wnli":
74 |             return {"acc": simple_accuracy(preds, labels)}
75 |         elif task_name == "hans":
76 |             return {"acc": simple_accuracy(preds, labels)}
77 |         else:
78 |             raise KeyError(task_name)
79 | 
80 |     def xnli_compute_metrics(task_name, preds, labels):
81 |         assert len(preds) == len(labels)
82 |         if task_name == "xnli":
83 |             return {"acc": simple_accuracy(preds, labels)}
84 |         else:
85 |             raise KeyError(task_name)
86 | 


--------------------------------------------------------------------------------
/transformers/convert_longformer_original_pytorch_lightning_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert RoBERTa checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | import pytorch_lightning as pl
21 | import torch
22 | 
23 | from transformers.modeling_longformer import LongformerForQuestionAnswering, LongformerModel
24 | 
25 | 
26 | class LightningModel(pl.LightningModule):
27 |     def __init__(self, model):
28 |         super().__init__()
29 |         self.model = model
30 |         self.num_labels = 2
31 |         self.qa_outputs = torch.nn.Linear(self.model.config.hidden_size, self.num_labels)
32 | 
33 |     # implement only because lighning requires to do so
34 |     def forward(self):
35 |         pass
36 | 
37 | 
38 | def convert_longformer_qa_checkpoint_to_pytorch(
39 |     longformer_model: str, longformer_question_answering_ckpt_path: str, pytorch_dump_folder_path: str
40 | ):
41 | 
42 |     # load longformer model from model identifier
43 |     longformer = LongformerModel.from_pretrained(longformer_model)
44 |     lightning_model = LightningModel(longformer)
45 | 
46 |     ckpt = torch.load(longformer_question_answering_ckpt_path, map_location=torch.device("cpu"))
47 |     lightning_model.load_state_dict(ckpt["state_dict"])
48 | 
49 |     # init longformer question answering model
50 |     longformer_for_qa = LongformerForQuestionAnswering.from_pretrained(longformer_model)
51 | 
52 |     # transfer weights
53 |     longformer_for_qa.longformer.load_state_dict(lightning_model.model.state_dict())
54 |     longformer_for_qa.qa_outputs.load_state_dict(lightning_model.qa_outputs.state_dict())
55 |     longformer_for_qa.eval()
56 | 
57 |     # save model
58 |     longformer_for_qa.save_pretrained(pytorch_dump_folder_path)
59 | 
60 |     print("Conversion succesful. Model saved under {}".format(pytorch_dump_folder_path))
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     parser = argparse.ArgumentParser()
65 |     # Required parameters
66 |     parser.add_argument(
67 |         "--longformer_model",
68 |         default=None,
69 |         type=str,
70 |         required=True,
71 |         help="model identifier of longformer. Should be either `longformer-base-4096` or `longformer-large-4096`.",
72 |     )
73 |     parser.add_argument(
74 |         "--longformer_question_answering_ckpt_path",
75 |         default=None,
76 |         type=str,
77 |         required=True,
78 |         help="Path the official PyTorch Lighning Checkpoint.",
79 |     )
80 |     parser.add_argument(
81 |         "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
82 |     )
83 |     args = parser.parse_args()
84 |     convert_longformer_qa_checkpoint_to_pytorch(
85 |         args.longformer_model, args.longformer_question_answering_ckpt_path, args.pytorch_dump_folder_path
86 |     )
87 | 


--------------------------------------------------------------------------------
/transformers/configuration_longformer.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Allen Institute for AI team and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """ Longformer configuration """
16 | 
17 | import logging
18 | from typing import List, Union
19 | 
20 | from .configuration_roberta import RobertaConfig
21 | 
22 | 
23 | logger = logging.getLogger(__name__)
24 | 
25 | LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
26 |     "allenai/longformer-base-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096/config.json",
27 |     "allenai/longformer-large-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096/config.json",
28 |     "allenai/longformer-large-4096-finetuned-triviaqa": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-finetuned-triviaqa/config.json",
29 |     "allenai/longformer-base-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096-extra.pos.embd.only/config.json",
30 |     "allenai/longformer-large-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-extra.pos.embd.only/config.json",
31 | }
32 | 
33 | 
34 | class LongformerConfig(RobertaConfig):
35 |     r"""
36 |         This is the configuration class to store the configuration of a :class:`~transformers.LongformerModel`.
37 |         It is used to instantiate an Longformer model according to the specified arguments, defining the model
38 |         architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
39 |         the RoBERTa `roberta-base <https://huggingface.co/roberta-base>`__ architecture with a sequence length 4,096.
40 | 
41 |         The :class:`~transformers.LongformerConfig` class directly inherits :class:`~transformers.RobertaConfig`.
42 |         It reuses the same defaults. Please check the parent class for more information.
43 | 
44 |         Args:
45 |             attention_window (:obj:`int` or :obj:`List[int]`, optional, defaults to 512):
46 |                 Size of an attention window around each token. If :obj:`int`, use the same size for all layers.
47 |                 To specify a different window size for each layer, use a :obj:`List[int]` where
48 |                 ``len(attention_window) == num_hidden_layers``.
49 | 
50 |         Example::
51 | 
52 |             >>> from transformers import LongformerConfig, LongformerModel
53 | 
54 |             >>> # Initializing a Longformer configuration
55 |             >>> configuration = LongformerConfig()
56 | 
57 |             >>> # Initializing a model from the configuration
58 |             >>> model = LongformerModel(configuration)
59 | 
60 |             >>> # Accessing the model configuration
61 |             >>> configuration = model.config
62 |     """
63 |     model_type = "longformer"
64 | 
65 |     def __init__(self, attention_window: Union[List[int], int] = 512, sep_token_id: int = 2, **kwargs):
66 |         super().__init__(**kwargs)
67 |         self.attention_window = attention_window
68 |         self.sep_token_id = sep_token_id
69 | 


--------------------------------------------------------------------------------
/transformers/configuration_roberta.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ RoBERTa configuration """
17 | 
18 | import logging
19 | 
20 | from .configuration_bert import BertConfig
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
25 |     "roberta-base":
26 |     "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-config.json",
27 |     "roberta-large":
28 |     "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-config.json",
29 |     "roberta-large-mnli":
30 |     "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-config.json",
31 |     "distilroberta-base":
32 |     "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-config.json",
33 |     "roberta-base-openai-detector":
34 |     "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-openai-detector-config.json",
35 |     "roberta-large-openai-detector":
36 |     "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-openai-detector-config.json",
37 | }
38 | 
39 | 
40 | class RobertaConfig(BertConfig):
41 |     r"""
42 |         This is the configuration class to store the configuration of a :class:`~transformers.RobertaModel`.
43 |         It is used to instantiate an RoBERTa model according to the specified arguments, defining the model
44 |         architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
45 |         the BERT `bert-base-uncased <https://huggingface.co/bert-base-uncased>`__ architecture.
46 | 
47 |         Configuration objects inherit from  :class:`~transformers.PretrainedConfig` and can be used
48 |         to control the model outputs. Read the documentation from  :class:`~transformers.PretrainedConfig`
49 |         for more information.
50 | 
51 |         The :class:`~transformers.RobertaConfig` class directly inherits :class:`~transformers.BertConfig`.
52 |         It reuses the same defaults. Please check the parent class for more information.
53 | 
54 |         Example::
55 | 
56 |             >>> from transformers import RobertaConfig, RobertaModel
57 | 
58 |             >>> # Initializing a RoBERTa configuration
59 |             >>> configuration = RobertaConfig()
60 | 
61 |             >>> # Initializing a model from the configuration
62 |             >>> model = RobertaModel(configuration)
63 | 
64 |             >>> # Accessing the model configuration
65 |             >>> configuration = model.config
66 |     """
67 |     model_type = "roberta"
68 |     n_autoregressive = 3
69 |     is_resnet = True
70 | 
71 |     def __init__(self,
72 |                  pad_token_id=1,
73 |                  bos_token_id=0,
74 |                  eos_token_id=2,
75 |                  **kwargs):
76 |         """Constructs RobertaConfig.
77 |         """
78 |         super().__init__(pad_token_id=pad_token_id,
79 |                          bos_token_id=bos_token_id,
80 |                          eos_token_id=eos_token_id,
81 |                          **kwargs)
82 | 


--------------------------------------------------------------------------------
/vocab/pinyin_vocab.txt:
--------------------------------------------------------------------------------
  1 | [OTHER]
  2 | a
  3 | ai
  4 | an
  5 | ang
  6 | ao
  7 | ba
  8 | bai
  9 | ban
 10 | bang
 11 | bao
 12 | bei
 13 | ben
 14 | beng
 15 | bi
 16 | bian
 17 | biao
 18 | bie
 19 | bin
 20 | bing
 21 | bo
 22 | bu
 23 | ca
 24 | cai
 25 | can
 26 | cang
 27 | cao
 28 | ce
 29 | cen
 30 | ceng
 31 | cha
 32 | chai
 33 | chan
 34 | chang
 35 | chao
 36 | che
 37 | chen
 38 | cheng
 39 | chi
 40 | chong
 41 | chou
 42 | chu
 43 | chuai
 44 | chuan
 45 | chuang
 46 | chui
 47 | chun
 48 | chuo
 49 | ci
 50 | cong
 51 | cou
 52 | cu
 53 | cuan
 54 | cui
 55 | cun
 56 | cuo
 57 | da
 58 | dai
 59 | dan
 60 | dang
 61 | dao
 62 | de
 63 | deng
 64 | di
 65 | dian
 66 | diao
 67 | die
 68 | ding
 69 | diu
 70 | dong
 71 | dou
 72 | du
 73 | duan
 74 | dui
 75 | dun
 76 | duo
 77 | e
 78 | ei
 79 | en
 80 | er
 81 | fa
 82 | fan
 83 | fang
 84 | fei
 85 | fen
 86 | feng
 87 | fo
 88 | fou
 89 | fu
 90 | ga
 91 | gai
 92 | gan
 93 | gang
 94 | gao
 95 | ge
 96 | gei
 97 | gen
 98 | geng
 99 | gong
100 | gou
101 | gu
102 | gua
103 | guai
104 | guan
105 | guang
106 | gui
107 | gun
108 | guo
109 | ha
110 | hai
111 | han
112 | hang
113 | hao
114 | he
115 | hei
116 | hen
117 | heng
118 | hong
119 | hou
120 | hu
121 | hua
122 | huai
123 | huan
124 | huang
125 | hui
126 | hun
127 | huo
128 | ji
129 | jia
130 | jian
131 | jiang
132 | jiao
133 | jie
134 | jin
135 | jing
136 | jiong
137 | jiu
138 | ju
139 | juan
140 | jue
141 | jun
142 | ka
143 | kai
144 | kan
145 | kang
146 | kao
147 | ke
148 | ken
149 | keng
150 | kong
151 | kou
152 | ku
153 | kua
154 | kuai
155 | kuan
156 | kuang
157 | kui
158 | kun
159 | kuo
160 | la
161 | lai
162 | lan
163 | lang
164 | lao
165 | le
166 | lei
167 | leng
168 | li
169 | lia
170 | lian
171 | liang
172 | liao
173 | lie
174 | lin
175 | ling
176 | liu
177 | long
178 | lou
179 | lu
180 | luan
181 | lun
182 | luo
183 | lv
184 | lve
185 | ma
186 | mai
187 | man
188 | mang
189 | mao
190 | me
191 | mei
192 | men
193 | meng
194 | mi
195 | mian
196 | miao
197 | mie
198 | min
199 | ming
200 | miu
201 | mo
202 | mou
203 | mu
204 | n
205 | na
206 | nai
207 | nan
208 | nang
209 | nao
210 | ne
211 | nei
212 | nen
213 | neng
214 | ni
215 | nian
216 | niang
217 | niao
218 | nie
219 | nin
220 | ning
221 | niu
222 | nong
223 | nou
224 | nu
225 | nuan
226 | nuo
227 | nv
228 | nve
229 | o
230 | ou
231 | pa
232 | pai
233 | pan
234 | pang
235 | pao
236 | pei
237 | pen
238 | peng
239 | pi
240 | pian
241 | piao
242 | pie
243 | pin
244 | ping
245 | po
246 | pou
247 | pu
248 | qi
249 | qia
250 | qian
251 | qiang
252 | qiao
253 | qie
254 | qin
255 | qing
256 | qiong
257 | qiu
258 | qu
259 | quan
260 | que
261 | qun
262 | ran
263 | rang
264 | rao
265 | re
266 | ren
267 | reng
268 | ri
269 | rong
270 | rou
271 | ru
272 | ruan
273 | rui
274 | run
275 | ruo
276 | sa
277 | sai
278 | san
279 | sang
280 | sao
281 | se
282 | sen
283 | seng
284 | sha
285 | shai
286 | shan
287 | shang
288 | shao
289 | she
290 | shei
291 | shen
292 | sheng
293 | shi
294 | shou
295 | shu
296 | shua
297 | shuai
298 | shuan
299 | shuang
300 | shui
301 | shun
302 | shuo
303 | si
304 | song
305 | sou
306 | su
307 | suan
308 | sui
309 | sun
310 | suo
311 | ta
312 | tai
313 | tan
314 | tang
315 | tao
316 | te
317 | teng
318 | ti
319 | tian
320 | tiao
321 | tie
322 | ting
323 | tong
324 | tou
325 | tu
326 | tuan
327 | tui
328 | tun
329 | tuo
330 | wa
331 | wai
332 | wan
333 | wang
334 | wei
335 | wen
336 | weng
337 | wo
338 | wu
339 | xi
340 | xia
341 | xian
342 | xiang
343 | xiao
344 | xie
345 | xin
346 | xing
347 | xiong
348 | xiu
349 | xu
350 | xuan
351 | xue
352 | xun
353 | ya
354 | yan
355 | yang
356 | yao
357 | ye
358 | yi
359 | yin
360 | ying
361 | yo
362 | yong
363 | you
364 | yu
365 | yuan
366 | yue
367 | yun
368 | za
369 | zai
370 | zan
371 | zang
372 | zao
373 | ze
374 | zei
375 | zen
376 | zeng
377 | zha
378 | zhai
379 | zhan
380 | zhang
381 | zhao
382 | zhe
383 | zhen
384 | zheng
385 | zhi
386 | zhong
387 | zhou
388 | zhu
389 | zhua
390 | zhuai
391 | zhuan
392 | zhuang
393 | zhui
394 | zhun
395 | zhuo
396 | zi
397 | zong
398 | zou
399 | zu
400 | zuan
401 | zui
402 | zun
403 | zuo
404 | den
405 | 


--------------------------------------------------------------------------------
/transformers/commands/run.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from argparse import ArgumentParser
 3 | 
 4 | from transformers.commands import BaseTransformersCLICommand
 5 | from transformers.pipelines import SUPPORTED_TASKS, Pipeline, PipelineDataFormat, pipeline
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
 9 | 
10 | 
11 | def try_infer_format_from_ext(path: str):
12 |     if not path:
13 |         return "pipe"
14 | 
15 |     for ext in PipelineDataFormat.SUPPORTED_FORMATS:
16 |         if path.endswith(ext):
17 |             return ext
18 | 
19 |     raise Exception(
20 |         "Unable to determine file format from file extension {}. "
21 |         "Please provide the format through --format {}".format(path, PipelineDataFormat.SUPPORTED_FORMATS)
22 |     )
23 | 
24 | 
25 | def run_command_factory(args):
26 |     nlp = pipeline(
27 |         task=args.task,
28 |         model=args.model if args.model else None,
29 |         config=args.config,
30 |         tokenizer=args.tokenizer,
31 |         device=args.device,
32 |     )
33 |     format = try_infer_format_from_ext(args.input) if args.format == "infer" else args.format
34 |     reader = PipelineDataFormat.from_str(
35 |         format=format,
36 |         output_path=args.output,
37 |         input_path=args.input,
38 |         column=args.column if args.column else nlp.default_input_names,
39 |         overwrite=args.overwrite,
40 |     )
41 |     return RunCommand(nlp, reader)
42 | 
43 | 
44 | class RunCommand(BaseTransformersCLICommand):
45 |     def __init__(self, nlp: Pipeline, reader: PipelineDataFormat):
46 |         self._nlp = nlp
47 |         self._reader = reader
48 | 
49 |     @staticmethod
50 |     def register_subcommand(parser: ArgumentParser):
51 |         run_parser = parser.add_parser("run", help="Run a pipeline through the CLI")
52 |         run_parser.add_argument("--task", choices=SUPPORTED_TASKS.keys(), help="Task to run")
53 |         run_parser.add_argument("--input", type=str, help="Path to the file to use for inference")
54 |         run_parser.add_argument("--output", type=str, help="Path to the file that will be used post to write results.")
55 |         run_parser.add_argument("--model", type=str, help="Name or path to the model to instantiate.")
56 |         run_parser.add_argument("--config", type=str, help="Name or path to the model's config to instantiate.")
57 |         run_parser.add_argument(
58 |             "--tokenizer", type=str, help="Name of the tokenizer to use. (default: same as the model name)"
59 |         )
60 |         run_parser.add_argument(
61 |             "--column",
62 |             type=str,
63 |             help="Name of the column to use as input. (For multi columns input as QA use column1,columns2)",
64 |         )
65 |         run_parser.add_argument(
66 |             "--format",
67 |             type=str,
68 |             default="infer",
69 |             choices=PipelineDataFormat.SUPPORTED_FORMATS,
70 |             help="Input format to read from",
71 |         )
72 |         run_parser.add_argument(
73 |             "--device",
74 |             type=int,
75 |             default=-1,
76 |             help="Indicate the device to run onto, -1 indicates CPU, >= 0 indicates GPU (default: -1)",
77 |         )
78 |         run_parser.add_argument("--overwrite", action="store_true", help="Allow overwriting the output file.")
79 |         run_parser.set_defaults(func=run_command_factory)
80 | 
81 |     def run(self):
82 |         nlp, outputs = self._nlp, []
83 | 
84 |         for entry in self._reader:
85 |             output = nlp(**entry) if self._reader.is_multi_columns else nlp(entry)
86 |             if isinstance(output, dict):
87 |                 outputs.append(output)
88 |             else:
89 |                 outputs += output
90 | 
91 |         # Saving data
92 |         if self._nlp.binary_output:
93 |             binary_path = self._reader.save_binary(outputs)
94 |             logger.warning("Current pipeline requires output to be in binary format, saving at {}".format(binary_path))
95 |         else:
96 |             self._reader.save(outputs)
97 | 


--------------------------------------------------------------------------------
/transformers/tokenization_electra.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Team, Stanford University and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .tokenization_bert import BertTokenizer, BertTokenizerFast
17 | 
18 | 
19 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
20 | 
21 | PRETRAINED_VOCAB_FILES_MAP = {
22 |     "vocab_file": {
23 |         "google/electra-small-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-generator/vocab.txt",
24 |         "google/electra-base-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-generator/vocab.txt",
25 |         "google/electra-large-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-generator/vocab.txt",
26 |         "google/electra-small-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-discriminator/vocab.txt",
27 |         "google/electra-base-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-discriminator/vocab.txt",
28 |         "google/electra-large-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-discriminator/vocab.txt",
29 |     }
30 | }
31 | 
32 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
33 |     "google/electra-small-generator": 512,
34 |     "google/electra-base-generator": 512,
35 |     "google/electra-large-generator": 512,
36 |     "google/electra-small-discriminator": 512,
37 |     "google/electra-base-discriminator": 512,
38 |     "google/electra-large-discriminator": 512,
39 | }
40 | 
41 | 
42 | PRETRAINED_INIT_CONFIGURATION = {
43 |     "google/electra-small-generator": {"do_lower_case": True},
44 |     "google/electra-base-generator": {"do_lower_case": True},
45 |     "google/electra-large-generator": {"do_lower_case": True},
46 |     "google/electra-small-discriminator": {"do_lower_case": True},
47 |     "google/electra-base-discriminator": {"do_lower_case": True},
48 |     "google/electra-large-discriminator": {"do_lower_case": True},
49 | }
50 | 
51 | 
52 | class ElectraTokenizer(BertTokenizer):
53 |     r"""
54 |     Constructs an Electra tokenizer.
55 |     :class:`~transformers.ElectraTokenizer` is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
56 |     tokenization: punctuation splitting + wordpiece.
57 | 
58 |     Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
59 |     parameters.
60 |     """
61 | 
62 |     vocab_files_names = VOCAB_FILES_NAMES
63 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
64 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
65 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
66 | 
67 | 
68 | class ElectraTokenizerFast(BertTokenizerFast):
69 |     r"""
70 |     Constructs a "Fast" Electra Fast tokenizer (backed by HuggingFace's `tokenizers` library).
71 | 
72 |     :class:`~transformers.ElectraTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs end-to-end
73 |     tokenization: punctuation splitting + wordpiece.
74 | 
75 |     Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
76 |     parameters.
77 |     """
78 |     vocab_files_names = VOCAB_FILES_NAMES
79 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
80 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
81 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
82 | 


--------------------------------------------------------------------------------
/transformers/benchmark/benchmark_args_tf.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The HuggingFace Inc. team.
  3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import logging
 18 | from dataclasses import dataclass, field
 19 | from typing import Tuple
 20 | 
 21 | from ..file_utils import cached_property, is_tf_available, tf_required
 22 | from .benchmark_args_utils import BenchmarkArguments
 23 | 
 24 | 
 25 | if is_tf_available():
 26 |     import tensorflow as tf
 27 | 
 28 | 
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | 
 32 | @dataclass
 33 | class TensorFlowBenchmarkArguments(BenchmarkArguments):
 34 |     tpu_name: str = field(
 35 |         default=None, metadata={"help": "Name of TPU"},
 36 |     )
 37 |     device_idx: int = field(
 38 |         default=0, metadata={"help": "CPU / GPU device index. Defaults to 0."},
 39 |     )
 40 |     eager_mode: bool = field(default=False, metadata={"help": "Benchmark models in eager model."})
 41 |     use_xla: bool = field(
 42 |         default=False,
 43 |         metadata={
 44 |             "help": "Benchmark models using XLA JIT compilation. Note that `eager_model` has to be set to `False`."
 45 |         },
 46 |     )
 47 | 
 48 |     @cached_property
 49 |     @tf_required
 50 |     def _setup_tpu(self) -> Tuple["tf.distribute.cluster_resolver.TPUClusterResolver"]:
 51 |         if not self.no_tpu:
 52 |             try:
 53 |                 if self.tpu_name:
 54 |                     tpu = tf.distribute.cluster_resolver.TPUClusterResolver(self.tpu_name)
 55 |                 else:
 56 |                     tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
 57 |             except ValueError:
 58 |                 tpu = None
 59 |         return tpu
 60 | 
 61 |     @cached_property
 62 |     @tf_required
 63 |     def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", "tf.distribute.cluster_resolver.TPUClusterResolver"]:
 64 |         if self.is_tpu:
 65 |             tf.config.experimental_connect_to_cluster(self._setup_tpu)
 66 |             tf.tpu.experimental.initialize_tpu_system(self._setup_tpu)
 67 | 
 68 |             strategy = tf.distribute.experimental.TPUStrategy(self._setup_tpu)
 69 |         else:
 70 |             # currently no multi gpu is allowed
 71 |             if self.is_gpu:
 72 |                 # TODO: Currently only single GPU is supported
 73 |                 tf.config.experimental.set_visible_devices(self.gpu_list[self.device_idx], "GPU")
 74 |                 strategy = tf.distribute.OneDeviceStrategy(device=f"/gpu:{self.device_idx}")
 75 |             else:
 76 |                 tf.config.experimental.set_visible_devices([], "GPU")  # disable GPU
 77 |                 strategy = tf.distribute.OneDeviceStrategy(device=f"/cpu:{self.device_idx}")
 78 | 
 79 |         return strategy
 80 | 
 81 |     @property
 82 |     @tf_required
 83 |     def is_tpu(self) -> bool:
 84 |         return self._setup_tpu is not None
 85 | 
 86 |     @property
 87 |     @tf_required
 88 |     def strategy(self) -> "tf.distribute.Strategy":
 89 |         return self._setup_strategy
 90 | 
 91 |     @property
 92 |     @tf_required
 93 |     def gpu_list(self):
 94 |         return tf.config.list_physical_devices("GPU")
 95 | 
 96 |     @property
 97 |     @tf_required
 98 |     def n_gpu(self) -> int:
 99 |         if not self.no_cuda:
100 |             return len(self.gpu_list)
101 |         return 0
102 | 
103 |     @property
104 |     def is_gpu(self) -> bool:
105 |         return self.n_gpu > 0
106 | 


--------------------------------------------------------------------------------
/transformers/tokenization_distilbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for DistilBERT."""
16 | 
17 | 
18 | import logging
19 | 
20 | from .tokenization_bert import BertTokenizer, BertTokenizerFast
21 | 
22 | 
23 | logger = logging.getLogger(__name__)
24 | 
25 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
26 | 
27 | PRETRAINED_VOCAB_FILES_MAP = {
28 |     "vocab_file": {
29 |         "distilbert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
30 |         "distilbert-base-uncased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
31 |         "distilbert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
32 |         "distilbert-base-cased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
33 |         "distilbert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-german-cased-vocab.txt",
34 |         "distilbert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
35 |     }
36 | }
37 | 
38 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
39 |     "distilbert-base-uncased": 512,
40 |     "distilbert-base-uncased-distilled-squad": 512,
41 |     "distilbert-base-cased": 512,
42 |     "distilbert-base-cased-distilled-squad": 512,
43 |     "distilbert-base-german-cased": 512,
44 |     "distilbert-base-multilingual-cased": 512,
45 | }
46 | 
47 | 
48 | PRETRAINED_INIT_CONFIGURATION = {
49 |     "distilbert-base-uncased": {"do_lower_case": True},
50 |     "distilbert-base-uncased-distilled-squad": {"do_lower_case": True},
51 |     "distilbert-base-cased": {"do_lower_case": False},
52 |     "distilbert-base-cased-distilled-squad": {"do_lower_case": False},
53 |     "distilbert-base-german-cased": {"do_lower_case": False},
54 |     "distilbert-base-multilingual-cased": {"do_lower_case": False},
55 | }
56 | 
57 | 
58 | class DistilBertTokenizer(BertTokenizer):
59 |     r"""
60 |     Constructs a  DistilBertTokenizer.
61 | 
62 |     :class:`~transformers.DistilBertTokenizer is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
63 |     tokenization: punctuation splitting + wordpiece.
64 | 
65 |     Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
66 |     parameters.
67 |     """
68 | 
69 |     vocab_files_names = VOCAB_FILES_NAMES
70 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
71 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
72 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
73 |     model_input_names = ["attention_mask"]
74 | 
75 | 
76 | class DistilBertTokenizerFast(BertTokenizerFast):
77 |     r"""
78 |     Constructs a  "Fast" DistilBertTokenizer (backed by HuggingFace's `tokenizers` library).
79 | 
80 |     :class:`~transformers.DistilBertTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs end-to-end
81 |     tokenization: punctuation splitting + wordpiece.
82 | 
83 |     Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
84 |     parameters.
85 |     """
86 | 
87 |     vocab_files_names = VOCAB_FILES_NAMES
88 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
89 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
90 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
91 |     model_input_names = ["attention_mask"]
92 | 


--------------------------------------------------------------------------------
/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Convert BERT checkpoint."""
 16 | 
 17 | 
 18 | import argparse
 19 | import logging
 20 | import os
 21 | 
 22 | import torch
 23 | 
 24 | from transformers import (
 25 |     CONFIG_NAME,
 26 |     WEIGHTS_NAME,
 27 |     XLNetConfig,
 28 |     XLNetForQuestionAnswering,
 29 |     XLNetForSequenceClassification,
 30 |     XLNetLMHeadModel,
 31 |     load_tf_weights_in_xlnet,
 32 | )
 33 | 
 34 | 
 35 | GLUE_TASKS_NUM_LABELS = {
 36 |     "cola": 2,
 37 |     "mnli": 3,
 38 |     "mrpc": 2,
 39 |     "sst-2": 2,
 40 |     "sts-b": 1,
 41 |     "qqp": 2,
 42 |     "qnli": 2,
 43 |     "rte": 2,
 44 |     "wnli": 2,
 45 | }
 46 | 
 47 | 
 48 | logging.basicConfig(level=logging.INFO)
 49 | 
 50 | 
 51 | def convert_xlnet_checkpoint_to_pytorch(
 52 |     tf_checkpoint_path, bert_config_file, pytorch_dump_folder_path, finetuning_task=None
 53 | ):
 54 |     # Initialise PyTorch model
 55 |     config = XLNetConfig.from_json_file(bert_config_file)
 56 | 
 57 |     finetuning_task = finetuning_task.lower() if finetuning_task is not None else ""
 58 |     if finetuning_task in GLUE_TASKS_NUM_LABELS:
 59 |         print("Building PyTorch XLNetForSequenceClassification model from configuration: {}".format(str(config)))
 60 |         config.finetuning_task = finetuning_task
 61 |         config.num_labels = GLUE_TASKS_NUM_LABELS[finetuning_task]
 62 |         model = XLNetForSequenceClassification(config)
 63 |     elif "squad" in finetuning_task:
 64 |         config.finetuning_task = finetuning_task
 65 |         model = XLNetForQuestionAnswering(config)
 66 |     else:
 67 |         model = XLNetLMHeadModel(config)
 68 | 
 69 |     # Load weights from tf checkpoint
 70 |     load_tf_weights_in_xlnet(model, config, tf_checkpoint_path)
 71 | 
 72 |     # Save pytorch-model
 73 |     pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)
 74 |     pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME)
 75 |     print("Save PyTorch model to {}".format(os.path.abspath(pytorch_weights_dump_path)))
 76 |     torch.save(model.state_dict(), pytorch_weights_dump_path)
 77 |     print("Save configuration file to {}".format(os.path.abspath(pytorch_config_dump_path)))
 78 |     with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
 79 |         f.write(config.to_json_string())
 80 | 
 81 | 
 82 | if __name__ == "__main__":
 83 |     parser = argparse.ArgumentParser()
 84 |     # Required parameters
 85 |     parser.add_argument(
 86 |         "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
 87 |     )
 88 |     parser.add_argument(
 89 |         "--xlnet_config_file",
 90 |         default=None,
 91 |         type=str,
 92 |         required=True,
 93 |         help="The config json file corresponding to the pre-trained XLNet model. \n"
 94 |         "This specifies the model architecture.",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--pytorch_dump_folder_path",
 98 |         default=None,
 99 |         type=str,
100 |         required=True,
101 |         help="Path to the folder to store the PyTorch model or dataset/vocab.",
102 |     )
103 |     parser.add_argument(
104 |         "--finetuning_task",
105 |         default=None,
106 |         type=str,
107 |         help="Name of a task on which the XLNet TensorFloaw model was fine-tuned",
108 |     )
109 |     args = parser.parse_args()
110 |     print(args)
111 | 
112 |     convert_xlnet_checkpoint_to_pytorch(
113 |         args.tf_checkpoint_path, args.xlnet_config_file, args.pytorch_dump_folder_path, args.finetuning_task
114 |     )
115 | 


--------------------------------------------------------------------------------
/transformers/data/datasets/language_modeling.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pickle
  4 | import time
  5 | 
  6 | import torch
  7 | from filelock import FileLock
  8 | from torch.utils.data.dataset import Dataset
  9 | 
 10 | from ...tokenization_utils import PreTrainedTokenizer
 11 | 
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class TextDataset(Dataset):
 17 |     """
 18 |     This will be superseded by a framework-agnostic approach
 19 |     soon.
 20 |     """
 21 | 
 22 |     def __init__(
 23 |         self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, overwrite_cache=False,
 24 |     ):
 25 |         assert os.path.isfile(file_path)
 26 | 
 27 |         block_size = block_size - tokenizer.num_special_tokens_to_add(pair=False)
 28 | 
 29 |         directory, filename = os.path.split(file_path)
 30 |         cached_features_file = os.path.join(
 31 |             directory, "cached_lm_{}_{}_{}".format(tokenizer.__class__.__name__, str(block_size), filename,),
 32 |         )
 33 | 
 34 |         # Make sure only the first process in distributed training processes the dataset,
 35 |         # and the others will use the cache.
 36 |         lock_path = cached_features_file + ".lock"
 37 |         with FileLock(lock_path):
 38 | 
 39 |             if os.path.exists(cached_features_file) and not overwrite_cache:
 40 |                 start = time.time()
 41 |                 with open(cached_features_file, "rb") as handle:
 42 |                     self.examples = pickle.load(handle)
 43 |                 logger.info(
 44 |                     f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start
 45 |                 )
 46 | 
 47 |             else:
 48 |                 logger.info(f"Creating features from dataset file at {directory}")
 49 | 
 50 |                 self.examples = []
 51 |                 with open(file_path, encoding="utf-8") as f:
 52 |                     text = f.read()
 53 | 
 54 |                 tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
 55 | 
 56 |                 for i in range(0, len(tokenized_text) - block_size + 1, block_size):  # Truncate in block of block_size
 57 |                     self.examples.append(
 58 |                         tokenizer.build_inputs_with_special_tokens(tokenized_text[i : i + block_size])
 59 |                     )
 60 |                 # Note that we are losing the last truncated example here for the sake of simplicity (no padding)
 61 |                 # If your dataset is small, first you should loook for a bigger one :-) and second you
 62 |                 # can change this behavior by adding (model specific) padding.
 63 | 
 64 |                 start = time.time()
 65 |                 with open(cached_features_file, "wb") as handle:
 66 |                     pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
 67 |                 logger.info(
 68 |                     "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
 69 |                 )
 70 | 
 71 |     def __len__(self):
 72 |         return len(self.examples)
 73 | 
 74 |     def __getitem__(self, i) -> torch.Tensor:
 75 |         return torch.tensor(self.examples[i], dtype=torch.long)
 76 | 
 77 | 
 78 | class LineByLineTextDataset(Dataset):
 79 |     """
 80 |     This will be superseded by a framework-agnostic approach
 81 |     soon.
 82 |     """
 83 | 
 84 |     def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, shuffle=False):
 85 |         assert os.path.isfile(file_path)
 86 |         # Here, we do not cache the features, operating under the assumption
 87 |         # that we will soon use fast multithreaded tokenizers from the
 88 |         # `tokenizers` repo everywhere =)
 89 |         logger.info("Creating features from dataset file at %s", file_path)
 90 | 
 91 |         with open(file_path, encoding="utf-8") as f:
 92 |             lines = [line for line in f.read().splitlines() if (len(line) > 0 and not line.isspace())]
 93 |         #if shuffle:
 94 |         #    import random
 95 |         #    random.shuffle(lines)
 96 |         self.shuffle = shuffle
 97 | 
 98 |         batch_encoding = tokenizer(lines, add_special_tokens=True, truncation=True, max_length=block_size)
 99 |         self.examples = batch_encoding["input_ids"]
100 | 
101 |     def __len__(self):
102 |         return len(self.examples)
103 | 
104 |     def __getitem__(self, i) -> torch.Tensor:
105 |         if self.shuffle and i % len(self.examples) == 0:
106 |             import random
107 |             random.shuffle(self.examples)
108 | 
109 |         return torch.tensor(self.examples[i], dtype=torch.long)
110 | 


--------------------------------------------------------------------------------
/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """Convert Huggingface Pytorch checkpoint to Tensorflow checkpoint."""
 17 | 
 18 | import argparse
 19 | import os
 20 | 
 21 | import numpy as np
 22 | import tensorflow as tf
 23 | import torch
 24 | 
 25 | from transformers import BertModel
 26 | 
 27 | 
 28 | def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, model_name: str):
 29 | 
 30 |     """
 31 |     :param model:BertModel Pytorch model instance to be converted
 32 |     :param ckpt_dir: Tensorflow model directory
 33 |     :param model_name: model name
 34 |     :return:
 35 | 
 36 |     Currently supported HF models:
 37 |         Y BertModel
 38 |         N BertForMaskedLM
 39 |         N BertForPreTraining
 40 |         N BertForMultipleChoice
 41 |         N BertForNextSentencePrediction
 42 |         N BertForSequenceClassification
 43 |         N BertForQuestionAnswering
 44 |     """
 45 | 
 46 |     tensors_to_transpose = ("dense.weight", "attention.self.query", "attention.self.key", "attention.self.value")
 47 | 
 48 |     var_map = (
 49 |         ("layer.", "layer_"),
 50 |         ("word_embeddings.weight", "word_embeddings"),
 51 |         ("position_embeddings.weight", "position_embeddings"),
 52 |         ("token_type_embeddings.weight", "token_type_embeddings"),
 53 |         (".", "/"),
 54 |         ("LayerNorm/weight", "LayerNorm/gamma"),
 55 |         ("LayerNorm/bias", "LayerNorm/beta"),
 56 |         ("weight", "kernel"),
 57 |     )
 58 | 
 59 |     if not os.path.isdir(ckpt_dir):
 60 |         os.makedirs(ckpt_dir)
 61 | 
 62 |     state_dict = model.state_dict()
 63 | 
 64 |     def to_tf_var_name(name: str):
 65 |         for patt, repl in iter(var_map):
 66 |             name = name.replace(patt, repl)
 67 |         return "bert/{}".format(name)
 68 | 
 69 |     def create_tf_var(tensor: np.ndarray, name: str, session: tf.Session):
 70 |         tf_dtype = tf.dtypes.as_dtype(tensor.dtype)
 71 |         tf_var = tf.get_variable(dtype=tf_dtype, shape=tensor.shape, name=name, initializer=tf.zeros_initializer())
 72 |         session.run(tf.variables_initializer([tf_var]))
 73 |         session.run(tf_var)
 74 |         return tf_var
 75 | 
 76 |     tf.reset_default_graph()
 77 |     with tf.Session() as session:
 78 |         for var_name in state_dict:
 79 |             tf_name = to_tf_var_name(var_name)
 80 |             torch_tensor = state_dict[var_name].numpy()
 81 |             if any([x in var_name for x in tensors_to_transpose]):
 82 |                 torch_tensor = torch_tensor.T
 83 |             tf_var = create_tf_var(tensor=torch_tensor, name=tf_name, session=session)
 84 |             tf.keras.backend.set_value(tf_var, torch_tensor)
 85 |             tf_weight = session.run(tf_var)
 86 |             print("Successfully created {}: {}".format(tf_name, np.allclose(tf_weight, torch_tensor)))
 87 | 
 88 |         saver = tf.train.Saver(tf.trainable_variables())
 89 |         saver.save(session, os.path.join(ckpt_dir, model_name.replace("-", "_") + ".ckpt"))
 90 | 
 91 | 
 92 | def main(raw_args=None):
 93 |     parser = argparse.ArgumentParser()
 94 |     parser.add_argument("--model_name", type=str, required=True, help="model name e.g. bert-base-uncased")
 95 |     parser.add_argument(
 96 |         "--cache_dir", type=str, default=None, required=False, help="Directory containing pytorch model"
 97 |     )
 98 |     parser.add_argument("--pytorch_model_path", type=str, required=True, help="/path/to/<pytorch-model-name>.bin")
 99 |     parser.add_argument("--tf_cache_dir", type=str, required=True, help="Directory in which to save tensorflow model")
100 |     args = parser.parse_args(raw_args)
101 | 
102 |     model = BertModel.from_pretrained(
103 |         pretrained_model_name_or_path=args.model_name,
104 |         state_dict=torch.load(args.pytorch_model_path),
105 |         cache_dir=args.cache_dir,
106 |     )
107 | 
108 |     convert_pytorch_checkpoint_to_tf(model=model, ckpt_dir=args.tf_cache_dir, model_name=args.model_name)
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     main()
113 | 


--------------------------------------------------------------------------------
/transformers/configuration_encoder_decoder.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The HuggingFace Inc. team.
  3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import copy
 18 | import logging
 19 | 
 20 | from .configuration_utils import PretrainedConfig
 21 | 
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | class EncoderDecoderConfig(PretrainedConfig):
 27 |     r"""
 28 |         :class:`~transformers.EncoderDecoderConfig` is the configuration class to store the configuration of a `EncoderDecoderModel`.
 29 | 
 30 |         It is used to instantiate an Encoder Decoder model according to the specified arguments, defining the encoder and decoder configs.
 31 |         Configuration objects inherit from  :class:`~transformers.PretrainedConfig`
 32 |         and can be used to control the model outputs.
 33 |         See the documentation for :class:`~transformers.PretrainedConfig` for more information.
 34 | 
 35 |         Args:
 36 |             kwargs (`optional`):
 37 |                 Remaining dictionary of keyword arguments. Notably:
 38 |                     encoder (:class:`PretrainedConfig`, optional, defaults to `None`):
 39 |                         An instance of a configuration object that defines the encoder config.
 40 |                     decoder (:class:`PretrainedConfig`, optional, defaults to `None`):
 41 |                         An instance of a configuration object that defines the decoder config.
 42 | 
 43 |         Example::
 44 | 
 45 |             >>> from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
 46 | 
 47 |             >>> # Initializing a BERT bert-base-uncased style configuration
 48 |             >>> config_encoder = BertConfig()
 49 |             >>> config_decoder = BertConfig()
 50 | 
 51 |             >>> config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
 52 | 
 53 |             >>> # Initializing a Bert2Bert model from the bert-base-uncased style configurations
 54 |             >>> model = EncoderDecoderModel(config=config)
 55 | 
 56 |             >>> # Accessing the model configuration
 57 |             >>> config_encoder = model.config.encoder
 58 |             >>> config_decoder  = model.config.decoder
 59 |     """
 60 |     model_type = "encoder_decoder"
 61 | 
 62 |     def __init__(self, **kwargs):
 63 |         super().__init__(**kwargs)
 64 |         assert (
 65 |             "encoder" in kwargs and "decoder" in kwargs
 66 |         ), "Config has to be initialized with encoder and decoder config"
 67 |         encoder_config = kwargs.pop("encoder")
 68 |         encoder_model_type = encoder_config.pop("model_type")
 69 |         decoder_config = kwargs.pop("decoder")
 70 |         decoder_model_type = decoder_config.pop("model_type")
 71 | 
 72 |         from transformers import AutoConfig
 73 | 
 74 |         self.encoder = AutoConfig.for_model(encoder_model_type, **encoder_config)
 75 |         self.decoder = AutoConfig.for_model(decoder_model_type, **decoder_config)
 76 |         self.is_encoder_decoder = True
 77 | 
 78 |     @classmethod
 79 |     def from_encoder_decoder_configs(
 80 |         cls, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig
 81 |     ) -> PretrainedConfig:
 82 |         r"""
 83 |         Instantiate a :class:`~transformers.EncoderDecoderConfig` (or a derived class) from a pre-trained encoder model configuration and decoder model configuration.
 84 | 
 85 |         Returns:
 86 |             :class:`EncoderDecoderConfig`: An instance of a configuration object
 87 |         """
 88 |         logger.info("Set `config.is_decoder=True` for decoder_config")
 89 |         decoder_config.is_decoder = True
 90 | 
 91 |         return cls(encoder=encoder_config.to_dict(), decoder=decoder_config.to_dict())
 92 | 
 93 |     def to_dict(self):
 94 |         """
 95 |         Serializes this instance to a Python dictionary. Override the default `to_dict()` from `PretrainedConfig`.
 96 | 
 97 |         Returns:
 98 |             :obj:`Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
 99 |         """
100 |         output = copy.deepcopy(self.__dict__)
101 |         output["encoder"] = self.encoder.to_dict()
102 |         output["decoder"] = self.decoder.to_dict()
103 |         output["model_type"] = self.__class__.model_type
104 |         return output
105 | 


--------------------------------------------------------------------------------
/transformers/configuration_t5.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2010, The T5 Authors and HuggingFace Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """ T5 model configuration """
 16 | 
 17 | 
 18 | import logging
 19 | 
 20 | from .configuration_utils import PretrainedConfig
 21 | 
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | T5_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 26 |     "t5-small": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-small-config.json",
 27 |     "t5-base": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json",
 28 |     "t5-large": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-large-config.json",
 29 |     "t5-3b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-3b-config.json",
 30 |     "t5-11b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-11b-config.json",
 31 | }
 32 | 
 33 | 
 34 | class T5Config(PretrainedConfig):
 35 |     r"""
 36 |         :class:`~transformers.T5Config` is the configuration class to store the configuration of a
 37 |         `T5Model`.
 38 | 
 39 | 
 40 |         Arguments:
 41 |             vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `T5Model`.
 42 |             d_model: Size of the encoder layers and the pooler layer. `d_model` can also accesed via the property `hidden_size`.
 43 |             num_layers: Number of hidden layers in the Transformer encoder. `num_layers` can also be accessed via the property `num_hidden_layers`.
 44 |             d_kv: Size of the key, query, value projections per attention head. `d_kv` has to be equal to `d_model // num_heads`.
 45 |             d_ff: Size of the intermediate feed forward layer in each `T5Block`.
 46 |             num_heads: Number of attention heads for each attention layer in
 47 |                 the Transformer encoder. `num_heads` can also be accessed via the property `num_attention_heads`.
 48 |             intermediate_size: The size of the "intermediate" (i.e., feed-forward)
 49 |                 layer in the Transformer encoder.
 50 |             hidden_act: The non-linear activation function (function or string) in the
 51 |                 encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
 52 |             hidden_dropout_prob: The dropout probabilitiy for all fully connected
 53 |                 layers in the embeddings, encoder, and pooler.
 54 |             attention_probs_dropout_prob: The dropout ratio for the attention
 55 |                 probabilities.
 56 |             n_positions: The maximum sequence length that this model might
 57 |                 ever be used with. Typically set this to something large just in case
 58 |                 (e.g., 512 or 1024 or 2048). `n_positions` can also be accessed via the property `max_position_embeddings`.
 59 |             type_vocab_size: The vocabulary size of the `token_type_ids` passed into
 60 |                 `T5Model`.
 61 |             initializer_factor: A factor for initializing all weight matrices (should be kept to 1.0, used for initialization testing).
 62 |             layer_norm_eps: The epsilon used by LayerNorm.
 63 |     """
 64 |     model_type = "t5"
 65 | 
 66 |     def __init__(
 67 |         self,
 68 |         vocab_size=32128,
 69 |         n_positions=512,
 70 |         d_model=512,
 71 |         d_kv=64,
 72 |         d_ff=2048,
 73 |         num_layers=6,
 74 |         num_heads=8,
 75 |         relative_attention_num_buckets=32,
 76 |         dropout_rate=0.1,
 77 |         layer_norm_epsilon=1e-6,
 78 |         initializer_factor=1.0,
 79 |         is_encoder_decoder=True,
 80 |         pad_token_id=0,
 81 |         eos_token_id=1,
 82 |         **kwargs
 83 |     ):
 84 |         super().__init__(
 85 |             pad_token_id=pad_token_id, eos_token_id=eos_token_id, is_encoder_decoder=is_encoder_decoder, **kwargs,
 86 |         )
 87 |         self.vocab_size = vocab_size
 88 |         self.n_positions = n_positions
 89 |         self.d_model = d_model
 90 |         self.d_kv = d_kv
 91 |         self.d_ff = d_ff
 92 |         self.num_layers = num_layers
 93 |         self.num_heads = num_heads
 94 |         self.relative_attention_num_buckets = relative_attention_num_buckets
 95 |         self.dropout_rate = dropout_rate
 96 |         self.layer_norm_epsilon = layer_norm_epsilon
 97 |         self.initializer_factor = initializer_factor
 98 | 
 99 |     @property
100 |     def max_position_embeddings(self):
101 |         return self.n_positions
102 | 
103 |     @property
104 |     def hidden_size(self):
105 |         return self.d_model
106 | 
107 |     @property
108 |     def num_attention_heads(self):
109 |         return self.num_heads
110 | 
111 |     @property
112 |     def num_hidden_layers(self):
113 |         return self.num_layers
114 | 


--------------------------------------------------------------------------------
/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Convert Transformer XL checkpoint and datasets."""
 16 | 
 17 | 
 18 | import argparse
 19 | import logging
 20 | import os
 21 | import pickle
 22 | import sys
 23 | 
 24 | import torch
 25 | 
 26 | import transformers.tokenization_transfo_xl as data_utils
 27 | from transformers import (
 28 |     CONFIG_NAME,
 29 |     WEIGHTS_NAME,
 30 |     TransfoXLConfig,
 31 |     TransfoXLLMHeadModel,
 32 |     load_tf_weights_in_transfo_xl,
 33 | )
 34 | from transformers.tokenization_transfo_xl import CORPUS_NAME, VOCAB_FILES_NAMES
 35 | 
 36 | 
 37 | logging.basicConfig(level=logging.INFO)
 38 | 
 39 | # We do this to be able to load python 2 datasets pickles
 40 | # See e.g. https://stackoverflow.com/questions/2121874/python-pickling-after-changing-a-modules-directory/2121918#2121918
 41 | data_utils.Vocab = data_utils.TransfoXLTokenizer
 42 | data_utils.Corpus = data_utils.TransfoXLCorpus
 43 | sys.modules["data_utils"] = data_utils
 44 | sys.modules["vocabulary"] = data_utils
 45 | 
 46 | 
 47 | def convert_transfo_xl_checkpoint_to_pytorch(
 48 |     tf_checkpoint_path, transfo_xl_config_file, pytorch_dump_folder_path, transfo_xl_dataset_file
 49 | ):
 50 |     if transfo_xl_dataset_file:
 51 |         # Convert a pre-processed corpus (see original TensorFlow repo)
 52 |         with open(transfo_xl_dataset_file, "rb") as fp:
 53 |             corpus = pickle.load(fp, encoding="latin1")
 54 |         # Save vocabulary and dataset cache as Dictionaries (should be better than pickles for the long-term)
 55 |         pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["pretrained_vocab_file"]
 56 |         print("Save vocabulary to {}".format(pytorch_vocab_dump_path))
 57 |         corpus_vocab_dict = corpus.vocab.__dict__
 58 |         torch.save(corpus_vocab_dict, pytorch_vocab_dump_path)
 59 | 
 60 |         corpus_dict_no_vocab = corpus.__dict__
 61 |         corpus_dict_no_vocab.pop("vocab", None)
 62 |         pytorch_dataset_dump_path = pytorch_dump_folder_path + "/" + CORPUS_NAME
 63 |         print("Save dataset to {}".format(pytorch_dataset_dump_path))
 64 |         torch.save(corpus_dict_no_vocab, pytorch_dataset_dump_path)
 65 | 
 66 |     if tf_checkpoint_path:
 67 |         # Convert a pre-trained TensorFlow model
 68 |         config_path = os.path.abspath(transfo_xl_config_file)
 69 |         tf_path = os.path.abspath(tf_checkpoint_path)
 70 | 
 71 |         print("Converting Transformer XL checkpoint from {} with config at {}".format(tf_path, config_path))
 72 |         # Initialise PyTorch model
 73 |         if transfo_xl_config_file == "":
 74 |             config = TransfoXLConfig()
 75 |         else:
 76 |             config = TransfoXLConfig.from_json_file(transfo_xl_config_file)
 77 |         print("Building PyTorch model from configuration: {}".format(str(config)))
 78 |         model = TransfoXLLMHeadModel(config)
 79 | 
 80 |         model = load_tf_weights_in_transfo_xl(model, config, tf_path)
 81 |         # Save pytorch-model
 82 |         pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)
 83 |         pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME)
 84 |         print("Save PyTorch model to {}".format(os.path.abspath(pytorch_weights_dump_path)))
 85 |         torch.save(model.state_dict(), pytorch_weights_dump_path)
 86 |         print("Save configuration file to {}".format(os.path.abspath(pytorch_config_dump_path)))
 87 |         with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
 88 |             f.write(config.to_json_string())
 89 | 
 90 | 
 91 | if __name__ == "__main__":
 92 |     parser = argparse.ArgumentParser()
 93 |     parser.add_argument(
 94 |         "--pytorch_dump_folder_path",
 95 |         default=None,
 96 |         type=str,
 97 |         required=True,
 98 |         help="Path to the folder to store the PyTorch model or dataset/vocab.",
 99 |     )
100 |     parser.add_argument(
101 |         "--tf_checkpoint_path",
102 |         default="",
103 |         type=str,
104 |         help="An optional path to a TensorFlow checkpoint path to be converted.",
105 |     )
106 |     parser.add_argument(
107 |         "--transfo_xl_config_file",
108 |         default="",
109 |         type=str,
110 |         help="An optional config json file corresponding to the pre-trained BERT model. \n"
111 |         "This specifies the model architecture.",
112 |     )
113 |     parser.add_argument(
114 |         "--transfo_xl_dataset_file",
115 |         default="",
116 |         type=str,
117 |         help="An optional dataset file to be converted in a vocabulary.",
118 |     )
119 |     args = parser.parse_args()
120 |     convert_transfo_xl_checkpoint_to_pytorch(
121 |         args.tf_checkpoint_path,
122 |         args.transfo_xl_config_file,
123 |         args.pytorch_dump_folder_path,
124 |         args.transfo_xl_dataset_file,
125 |     )
126 | 


--------------------------------------------------------------------------------
/transformers/modeling_camembert.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2019 Inria, Facebook AI Research and the HuggingFace Inc. team.
  3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """PyTorch CamemBERT model. """
 17 | 
 18 | import logging
 19 | 
 20 | from .configuration_camembert import CamembertConfig
 21 | from .file_utils import add_start_docstrings
 22 | from .modeling_roberta import (
 23 |     RobertaForMaskedLM,
 24 |     RobertaForMultipleChoice,
 25 |     RobertaForQuestionAnswering,
 26 |     RobertaForSequenceClassification,
 27 |     RobertaForTokenClassification,
 28 |     RobertaModel,
 29 | )
 30 | 
 31 | 
 32 | logger = logging.getLogger(__name__)
 33 | 
 34 | _TOKENIZER_FOR_DOC = "CamembertTokenizer"
 35 | 
 36 | CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
 37 |     "camembert-base",
 38 |     "Musixmatch/umberto-commoncrawl-cased-v1",
 39 |     "Musixmatch/umberto-wikipedia-uncased-v1",
 40 |     # See all CamemBERT models at https://huggingface.co/models?filter=camembert
 41 | ]
 42 | 
 43 | CAMEMBERT_START_DOCSTRING = r"""
 44 | 
 45 |     This model is a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`_ sub-class.
 46 |     Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
 47 |     usage and behavior.
 48 | 
 49 |     Parameters:
 50 |         config (:class:`~transformers.CamembertConfig`): Model configuration class with all the parameters of the
 51 |             model. Initializing with a config file does not load the weights associated with the model, only the
 52 |             configuration.
 53 |             Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
 54 |         output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`):
 55 |             If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
 56 | """
 57 | 
 58 | 
 59 | @add_start_docstrings(
 60 |     "The bare CamemBERT Model transformer outputting raw hidden-states without any specific head on top.",
 61 |     CAMEMBERT_START_DOCSTRING,
 62 | )
 63 | class CamembertModel(RobertaModel):
 64 |     """
 65 |     This class overrides :class:`~transformers.RobertaModel`. Please check the
 66 |     superclass for the appropriate documentation alongside usage examples.
 67 |     """
 68 | 
 69 |     config_class = CamembertConfig
 70 | 
 71 | 
 72 | @add_start_docstrings(
 73 |     """CamemBERT Model with a `language modeling` head on top. """, CAMEMBERT_START_DOCSTRING,
 74 | )
 75 | class CamembertForMaskedLM(RobertaForMaskedLM):
 76 |     """
 77 |     This class overrides :class:`~transformers.RobertaForMaskedLM`. Please check the
 78 |     superclass for the appropriate documentation alongside usage examples.
 79 |     """
 80 | 
 81 |     config_class = CamembertConfig
 82 | 
 83 | 
 84 | @add_start_docstrings(
 85 |     """CamemBERT Model transformer with a sequence classification/regression head on top (a linear layer
 86 |     on top of the pooled output) e.g. for GLUE tasks. """,
 87 |     CAMEMBERT_START_DOCSTRING,
 88 | )
 89 | class CamembertForSequenceClassification(RobertaForSequenceClassification):
 90 |     """
 91 |     This class overrides :class:`~transformers.RobertaForSequenceClassification`. Please check the
 92 |     superclass for the appropriate documentation alongside usage examples.
 93 |     """
 94 | 
 95 |     config_class = CamembertConfig
 96 | 
 97 | 
 98 | @add_start_docstrings(
 99 |     """CamemBERT Model with a multiple choice classification head on top (a linear layer on top of
100 |     the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """,
101 |     CAMEMBERT_START_DOCSTRING,
102 | )
103 | class CamembertForMultipleChoice(RobertaForMultipleChoice):
104 |     """
105 |     This class overrides :class:`~transformers.RobertaForMultipleChoice`. Please check the
106 |     superclass for the appropriate documentation alongside usage examples.
107 |     """
108 | 
109 |     config_class = CamembertConfig
110 | 
111 | 
112 | @add_start_docstrings(
113 |     """CamemBERT Model with a token classification head on top (a linear layer on top of
114 |     the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,
115 |     CAMEMBERT_START_DOCSTRING,
116 | )
117 | class CamembertForTokenClassification(RobertaForTokenClassification):
118 |     """
119 |     This class overrides :class:`~transformers.RobertaForTokenClassification`. Please check the
120 |     superclass for the appropriate documentation alongside usage examples.
121 |     """
122 | 
123 |     config_class = CamembertConfig
124 | 
125 | 
126 | @add_start_docstrings(
127 |     """CamemBERT Model with a span classification head on top for extractive question-answering tasks like SQuAD
128 |     (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits` """,
129 |     CAMEMBERT_START_DOCSTRING,
130 | )
131 | class CamembertForQuestionAnswering(RobertaForQuestionAnswering):
132 |     """
133 |     This class overrides :class:`~transformers.RobertaForQuestionAnswering`. Please check the
134 |     superclass for the appropriate documentation alongside usage examples.
135 |     """
136 | 
137 |     config_class = CamembertConfig
138 | 


--------------------------------------------------------------------------------
/transformers/configuration_retribert.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """ RetriBERT model configuration """
 16 | 
 17 | 
 18 | import logging
 19 | 
 20 | from .configuration_utils import PretrainedConfig
 21 | 
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | # TODO: uploadto AWS
 26 | RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 27 |     "retribert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-config.json",
 28 | }
 29 | 
 30 | 
 31 | class RetriBertConfig(PretrainedConfig):
 32 |     r"""
 33 |         This is the configuration class to store the configuration of a :class:`~transformers.RetriBertModel`.
 34 |         It is used to instantiate a RetriBertModel model according to the specified arguments, defining the model
 35 |         architecture.
 36 | 
 37 |         Configuration objects inherit from  :class:`~transformers.PretrainedConfig` and can be used
 38 |         to control the model outputs. Read the documentation from  :class:`~transformers.PretrainedConfig`
 39 |         for more information.
 40 | 
 41 | 
 42 |         Args:
 43 |             vocab_size (:obj:`int`, optional, defaults to 30522):
 44 |                 Vocabulary size of the BERT model. Defines the different tokens that
 45 |                 can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.BertModel`.
 46 |             hidden_size (:obj:`int`, optional, defaults to 768):
 47 |                 Dimensionality of the encoder layers and the pooler layer.
 48 |             num_hidden_layers (:obj:`int`, optional, defaults to 12):
 49 |                 Number of hidden layers in the Transformer encoder.
 50 |             num_attention_heads (:obj:`int`, optional, defaults to 12):
 51 |                 Number of attention heads for each attention layer in the Transformer encoder.
 52 |             intermediate_size (:obj:`int`, optional, defaults to 3072):
 53 |                 Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
 54 |             hidden_act (:obj:`str` or :obj:`function`, optional, defaults to "gelu"):
 55 |                 The non-linear activation function (function or string) in the encoder and pooler.
 56 |                 If string, "gelu", "relu", "swish" and "gelu_new" are supported.
 57 |             hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
 58 |                 The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
 59 |             attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
 60 |                 The dropout ratio for the attention probabilities.
 61 |             max_position_embeddings (:obj:`int`, optional, defaults to 512):
 62 |                 The maximum sequence length that this model might ever be used with.
 63 |                 Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
 64 |             type_vocab_size (:obj:`int`, optional, defaults to 2):
 65 |                 The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
 66 |             initializer_range (:obj:`float`, optional, defaults to 0.02):
 67 |                 The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
 68 |             layer_norm_eps (:obj:`float`, optional, defaults to 1e-12):
 69 |                 The epsilon used by the layer normalization layers.
 70 |             share_encoders (:obj:`bool`, optional, defaults to True):
 71 |                 Whether to use the same Bert-type encoder for the queries and document
 72 |             projection_dim (:obj:`int`, optional, defaults to 128):
 73 |                 Final dimension of the query and document representation after projection
 74 | 
 75 |     """
 76 |     model_type = "retribert"
 77 | 
 78 |     def __init__(
 79 |         self,
 80 |         vocab_size=30522,
 81 |         hidden_size=768,
 82 |         num_hidden_layers=8,
 83 |         num_attention_heads=12,
 84 |         intermediate_size=3072,
 85 |         hidden_act="gelu",
 86 |         hidden_dropout_prob=0.1,
 87 |         attention_probs_dropout_prob=0.1,
 88 |         max_position_embeddings=512,
 89 |         type_vocab_size=2,
 90 |         initializer_range=0.02,
 91 |         layer_norm_eps=1e-12,
 92 |         share_encoders=True,
 93 |         projection_dim=128,
 94 |         pad_token_id=0,
 95 |         **kwargs
 96 |     ):
 97 |         super().__init__(pad_token_id=pad_token_id, **kwargs)
 98 | 
 99 |         self.vocab_size = vocab_size
100 |         self.hidden_size = hidden_size
101 |         self.num_hidden_layers = num_hidden_layers
102 |         self.num_attention_heads = num_attention_heads
103 |         self.hidden_act = hidden_act
104 |         self.intermediate_size = intermediate_size
105 |         self.hidden_dropout_prob = hidden_dropout_prob
106 |         self.attention_probs_dropout_prob = attention_probs_dropout_prob
107 |         self.max_position_embeddings = max_position_embeddings
108 |         self.type_vocab_size = type_vocab_size
109 |         self.initializer_range = initializer_range
110 |         self.layer_norm_eps = layer_norm_eps
111 |         self.share_encoders = share_encoders
112 |         self.projection_dim = projection_dim
113 | 


--------------------------------------------------------------------------------
/transformers/modeling_xlm_roberta.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2019 Facebook AI Research and the HuggingFace Inc. team.
  3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """PyTorch XLM-RoBERTa model. """
 17 | 
 18 | 
 19 | import logging
 20 | 
 21 | from .configuration_xlm_roberta import XLMRobertaConfig
 22 | from .file_utils import add_start_docstrings
 23 | from .modeling_roberta import (
 24 |     RobertaForMaskedLM,
 25 |     RobertaForMultipleChoice,
 26 |     RobertaForQuestionAnswering,
 27 |     RobertaForSequenceClassification,
 28 |     RobertaForTokenClassification,
 29 |     RobertaModel,
 30 | )
 31 | 
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
 36 |     "xlm-roberta-base",
 37 |     "xlm-roberta-large",
 38 |     "xlm-roberta-large-finetuned-conll02-dutch",
 39 |     "xlm-roberta-large-finetuned-conll02-spanish",
 40 |     "xlm-roberta-large-finetuned-conll03-english",
 41 |     "xlm-roberta-large-finetuned-conll03-german",
 42 |     # See all XLM-RoBERTa models at https://huggingface.co/models?filter=xlm-roberta
 43 | ]
 44 | 
 45 | 
 46 | XLM_ROBERTA_START_DOCSTRING = r"""
 47 | 
 48 |     This model is a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`_ sub-class.
 49 |     Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
 50 |     usage and behavior.
 51 | 
 52 |     Parameters:
 53 |         config (:class:`~transformers.XLMRobertaConfig`): Model configuration class with all the parameters of the
 54 |             model. Initializing with a config file does not load the weights associated with the model, only the configuration.
 55 |             Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
 56 |         output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`):
 57 |             If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
 58 | """
 59 | 
 60 | 
 61 | @add_start_docstrings(
 62 |     "The bare XLM-RoBERTa Model transformer outputting raw hidden-states without any specific head on top.",
 63 |     XLM_ROBERTA_START_DOCSTRING,
 64 | )
 65 | class XLMRobertaModel(RobertaModel):
 66 |     """
 67 |     This class overrides :class:`~transformers.RobertaModel`. Please check the
 68 |     superclass for the appropriate documentation alongside usage examples.
 69 |     """
 70 | 
 71 |     config_class = XLMRobertaConfig
 72 | 
 73 | 
 74 | @add_start_docstrings(
 75 |     """XLM-RoBERTa Model with a `language modeling` head on top. """, XLM_ROBERTA_START_DOCSTRING,
 76 | )
 77 | class XLMRobertaForMaskedLM(RobertaForMaskedLM):
 78 |     """
 79 |     This class overrides :class:`~transformers.RobertaForMaskedLM`. Please check the
 80 |     superclass for the appropriate documentation alongside usage examples.
 81 |     """
 82 | 
 83 |     config_class = XLMRobertaConfig
 84 | 
 85 | 
 86 | @add_start_docstrings(
 87 |     """XLM-RoBERTa Model transformer with a sequence classification/regression head on top (a linear layer
 88 |     on top of the pooled output) e.g. for GLUE tasks. """,
 89 |     XLM_ROBERTA_START_DOCSTRING,
 90 | )
 91 | class XLMRobertaForSequenceClassification(RobertaForSequenceClassification):
 92 |     """
 93 |     This class overrides :class:`~transformers.RobertaForSequenceClassification`. Please check the
 94 |     superclass for the appropriate documentation alongside usage examples.
 95 |     """
 96 | 
 97 |     config_class = XLMRobertaConfig
 98 | 
 99 | 
100 | @add_start_docstrings(
101 |     """XLM-RoBERTa Model with a multiple choice classification head on top (a linear layer on top of
102 |     the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """,
103 |     XLM_ROBERTA_START_DOCSTRING,
104 | )
105 | class XLMRobertaForMultipleChoice(RobertaForMultipleChoice):
106 |     """
107 |     This class overrides :class:`~transformers.RobertaForMultipleChoice`. Please check the
108 |     superclass for the appropriate documentation alongside usage examples.
109 |     """
110 | 
111 |     config_class = XLMRobertaConfig
112 | 
113 | 
114 | @add_start_docstrings(
115 |     """XLM-RoBERTa Model with a token classification head on top (a linear layer on top of
116 |     the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,
117 |     XLM_ROBERTA_START_DOCSTRING,
118 | )
119 | class XLMRobertaForTokenClassification(RobertaForTokenClassification):
120 |     """
121 |     This class overrides :class:`~transformers.RobertaForTokenClassification`. Please check the
122 |     superclass for the appropriate documentation alongside usage examples.
123 |     """
124 | 
125 |     config_class = XLMRobertaConfig
126 | 
127 | 
128 | @add_start_docstrings(
129 |     """XLM-RoBERTa Model with a span classification head on top for extractive question-answering tasks like SQuAD (a
130 |     linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).""",
131 |     XLM_ROBERTA_START_DOCSTRING,
132 | )
133 | class XLMRobertaForQuestionAnswering(RobertaForQuestionAnswering):
134 |     """
135 |     This class overrides :class:`~transformers.RobertaForQuestionAnswering`. Please check the
136 |     superclass for the appropriate documentation alongside usage examples.
137 |     """
138 | 
139 |     config_class = XLMRobertaConfig
140 | 


--------------------------------------------------------------------------------
/transformers/data/datasets/glue.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import time
  4 | from dataclasses import dataclass, field
  5 | from enum import Enum
  6 | from typing import List, Optional, Union
  7 | 
  8 | import torch
  9 | from filelock import FileLock
 10 | from torch.utils.data.dataset import Dataset
 11 | 
 12 | from ...tokenization_bart import BartTokenizer, BartTokenizerFast
 13 | from ...tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
 14 | from ...tokenization_utils import PreTrainedTokenizer
 15 | from ...tokenization_xlm_roberta import XLMRobertaTokenizer
 16 | from ..processors.glue import glue_convert_examples_to_features, glue_output_modes, glue_processors
 17 | from ..processors.utils import InputFeatures
 18 | 
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | @dataclass
 24 | class GlueDataTrainingArguments:
 25 |     """
 26 |     Arguments pertaining to what data we are going to input our model for training and eval.
 27 | 
 28 |     Using `HfArgumentParser` we can turn this class
 29 |     into argparse arguments to be able to specify them on
 30 |     the command line.
 31 |     """
 32 | 
 33 |     task_name: str = field(metadata={"help": "The name of the task to train on: " + ", ".join(glue_processors.keys())})
 34 |     data_dir: str = field(
 35 |         metadata={"help": "The input data dir. Should contain the .tsv files (or other data files) for the task."}
 36 |     )
 37 |     max_seq_length: int = field(
 38 |         default=128,
 39 |         metadata={
 40 |             "help": "The maximum total input sequence length after tokenization. Sequences longer "
 41 |             "than this will be truncated, sequences shorter will be padded."
 42 |         },
 43 |     )
 44 |     overwrite_cache: bool = field(
 45 |         default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
 46 |     )
 47 | 
 48 |     def __post_init__(self):
 49 |         self.task_name = self.task_name.lower()
 50 | 
 51 | 
 52 | class Split(Enum):
 53 |     train = "train"
 54 |     dev = "dev"
 55 |     test = "test"
 56 | 
 57 | 
 58 | class GlueDataset(Dataset):
 59 |     """
 60 |     This will be superseded by a framework-agnostic approach
 61 |     soon.
 62 |     """
 63 | 
 64 |     args: GlueDataTrainingArguments
 65 |     output_mode: str
 66 |     features: List[InputFeatures]
 67 | 
 68 |     def __init__(
 69 |         self,
 70 |         args: GlueDataTrainingArguments,
 71 |         tokenizer: PreTrainedTokenizer,
 72 |         limit_length: Optional[int] = None,
 73 |         mode: Union[str, Split] = Split.train,
 74 |         cache_dir: Optional[str] = None,
 75 |     ):
 76 |         self.args = args
 77 |         self.processor = glue_processors[args.task_name]()
 78 |         self.output_mode = glue_output_modes[args.task_name]
 79 |         if isinstance(mode, str):
 80 |             try:
 81 |                 mode = Split[mode]
 82 |             except KeyError:
 83 |                 raise KeyError("mode is not a valid split name")
 84 |         # Load data features from cache or dataset file
 85 |         cached_features_file = os.path.join(
 86 |             cache_dir if cache_dir is not None else args.data_dir,
 87 |             "cached_{}_{}_{}_{}".format(
 88 |                 mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), args.task_name,
 89 |             ),
 90 |         )
 91 |         label_list = self.processor.get_labels()
 92 |         if args.task_name in ["mnli", "mnli-mm"] and tokenizer.__class__ in (
 93 |             RobertaTokenizer,
 94 |             RobertaTokenizerFast,
 95 |             XLMRobertaTokenizer,
 96 |             BartTokenizer,
 97 |             BartTokenizerFast,
 98 |         ):
 99 |             # HACK(label indices are swapped in RoBERTa pretrained model)
100 |             label_list[1], label_list[2] = label_list[2], label_list[1]
101 |         self.label_list = label_list
102 | 
103 |         # Make sure only the first process in distributed training processes the dataset,
104 |         # and the others will use the cache.
105 |         lock_path = cached_features_file + ".lock"
106 |         with FileLock(lock_path):
107 | 
108 |             if os.path.exists(cached_features_file) and not args.overwrite_cache:
109 |                 start = time.time()
110 |                 self.features = torch.load(cached_features_file)
111 |                 logger.info(
112 |                     f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start
113 |                 )
114 |             else:
115 |                 logger.info(f"Creating features from dataset file at {args.data_dir}")
116 | 
117 |                 if mode == Split.dev:
118 |                     examples = self.processor.get_dev_examples(args.data_dir)
119 |                 elif mode == Split.test:
120 |                     examples = self.processor.get_test_examples(args.data_dir)
121 |                 else:
122 |                     examples = self.processor.get_train_examples(args.data_dir)
123 |                 if limit_length is not None:
124 |                     examples = examples[:limit_length]
125 |                 self.features = glue_convert_examples_to_features(
126 |                     examples,
127 |                     tokenizer,
128 |                     max_length=args.max_seq_length,
129 |                     label_list=label_list,
130 |                     output_mode=self.output_mode,
131 |                 )
132 |                 start = time.time()
133 |                 torch.save(self.features, cached_features_file)
134 |                 # ^ This seems to take a lot of time so I want to investigate why and how we can improve.
135 |                 logger.info(
136 |                     "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
137 |                 )
138 | 
139 |     def __len__(self):
140 |         return len(self.features)
141 | 
142 |     def __getitem__(self, i) -> InputFeatures:
143 |         return self.features[i]
144 | 
145 |     def get_labels(self):
146 |         return self.label_list
147 | 


--------------------------------------------------------------------------------
/transformers/benchmark/benchmark_args_utils.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The HuggingFace Inc. team.
  3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import dataclasses
 18 | import json
 19 | import logging
 20 | from dataclasses import dataclass, field
 21 | from time import time
 22 | from typing import List
 23 | 
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | def list_field(default=None, metadata=None):
 29 |     return field(default_factory=lambda: default, metadata=metadata)
 30 | 
 31 | 
 32 | @dataclass
 33 | class BenchmarkArguments:
 34 |     """
 35 |     BenchMarkArguments are arguments we use in our benchmark scripts
 36 |     **which relate to the training loop itself**.
 37 | 
 38 |     Using `HfArgumentParser` we can turn this class
 39 |     into argparse arguments to be able to specify them on
 40 |     the command line.
 41 |     """
 42 | 
 43 |     models: List[str] = list_field(
 44 |         default=[],
 45 |         metadata={
 46 |             "help": "Model checkpoints to be provided to the AutoModel classes. Leave blank to benchmark the base version of all available models"
 47 |         },
 48 |     )
 49 | 
 50 |     batch_sizes: List[int] = list_field(
 51 |         default=[8], metadata={"help": "List of batch sizes for which memory and time performance will be evaluated"}
 52 |     )
 53 | 
 54 |     sequence_lengths: List[int] = list_field(
 55 |         default=[8, 32, 128, 512],
 56 |         metadata={"help": "List of sequence lengths for which memory and time performance will be evaluated"},
 57 |     )
 58 | 
 59 |     no_inference: bool = field(default=False, metadata={"help": "Don't benchmark inference of model"})
 60 |     no_cuda: bool = field(default=False, metadata={"help": "Whether to run on available cuda devices"})
 61 |     no_tpu: bool = field(default=False, metadata={"help": "Whether to run on available tpu devices"})
 62 |     fp16: bool = field(default=False, metadata={"help": "Use FP16 to accelerate inference."})
 63 |     training: bool = field(default=False, metadata={"help": "Benchmark training of model"})
 64 |     verbose: bool = field(default=False, metadata={"help": "Verbose memory tracing"})
 65 |     no_speed: bool = field(default=False, metadata={"help": "Don't perform speed measurments"})
 66 |     no_memory: bool = field(default=False, metadata={"help": "Don't perform memory measurments"})
 67 |     trace_memory_line_by_line: bool = field(default=False, metadata={"help": "Trace memory line by line"})
 68 |     save_to_csv: bool = field(default=False, metadata={"help": "Save result to a CSV file"})
 69 |     log_print: bool = field(default=False, metadata={"help": "Save all print statements in a log file"})
 70 |     no_env_print: bool = field(default=False, metadata={"help": "Don't print environment information"})
 71 |     no_multi_process: bool = field(
 72 |         default=False,
 73 |         metadata={
 74 |             "help": "Don't use multiprocessing for memory and speed measurement. It is highly recommended to use multiprocessing for accurate CPU and GPU memory measurements. This option should only be used for debugging / testing and on TPU."
 75 |         },
 76 |     )
 77 |     inference_time_csv_file: str = field(
 78 |         default=f"inference_time_{round(time())}.csv",
 79 |         metadata={"help": "CSV filename used if saving time results to csv."},
 80 |     )
 81 |     inference_memory_csv_file: str = field(
 82 |         default=f"inference_memory_{round(time())}.csv",
 83 |         metadata={"help": "CSV filename used if saving memory results to csv."},
 84 |     )
 85 |     train_time_csv_file: str = field(
 86 |         default=f"train_time_{round(time())}.csv",
 87 |         metadata={"help": "CSV filename used if saving time results to csv for training."},
 88 |     )
 89 |     train_memory_csv_file: str = field(
 90 |         default=f"train_memory_{round(time())}.csv",
 91 |         metadata={"help": "CSV filename used if saving memory results to csv for training."},
 92 |     )
 93 |     env_info_csv_file: str = field(
 94 |         default=f"env_info_{round(time())}.csv",
 95 |         metadata={"help": "CSV filename used if saving environment information."},
 96 |     )
 97 |     log_filename: str = field(
 98 |         default=f"log_{round(time())}.csv",
 99 |         metadata={"help": "Log filename used if print statements are saved in log."},
100 |     )
101 |     repeat: int = field(default=3, metadata={"help": "Times an experiment will be run."})
102 |     only_pretrain_model: bool = field(
103 |         default=False,
104 |         metadata={
105 |             "help": "Instead of loading the model as defined in `config.architectures` if exists, just load the pretrain model weights."
106 |         },
107 |     )
108 | 
109 |     def to_json_string(self):
110 |         """
111 |         Serializes this instance to a JSON string.
112 |         """
113 |         return json.dumps(dataclasses.asdict(self), indent=2)
114 | 
115 |     @property
116 |     def model_names(self):
117 |         assert (
118 |             len(self.models) > 0
119 |         ), "Please make sure you provide at least one model name / model identifier, *e.g.* `--models bert-base-cased` or `args.models = ['bert-base-cased']."
120 |         return self.models
121 | 
122 |     @property
123 |     def do_multi_processing(self):
124 |         if self.no_multi_process:
125 |             return False
126 |         elif self.is_tpu:
127 |             logger.info("Multiprocessing is currently not possible on TPU.")
128 |             return False
129 |         else:
130 |             return True
131 | 


--------------------------------------------------------------------------------
/transformers/configuration_ctrl.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 Salesforce and HuggingFace Inc. team.
  3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """ Salesforce CTRL configuration """
 16 | 
 17 | 
 18 | import logging
 19 | 
 20 | from .configuration_utils import PretrainedConfig
 21 | 
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://s3.amazonaws.com/models.huggingface.co/bert/ctrl-config.json"}
 26 | 
 27 | 
 28 | class CTRLConfig(PretrainedConfig):
 29 |     """
 30 |         This is the configuration class to store the configuration of a :class:`~transformers.CTRLModel`.
 31 |         It is used to instantiate an CTRL model according to the specified arguments, defining the model
 32 |         architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
 33 |         the `ctrl <https://huggingface.co/ctrl>`__ architecture from SalesForce.
 34 | 
 35 |         Configuration objects inherit from  :class:`~transformers.PretrainedConfig` and can be used
 36 |         to control the model outputs. Read the documentation from  :class:`~transformers.PretrainedConfig`
 37 |         for more information.
 38 | 
 39 |         Args:
 40 |             vocab_size (:obj:`int`, optional, defaults to 246534):
 41 |                 Vocabulary size of the CTRL model. Defines the different tokens that
 42 |                 can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.CTRLModel`.
 43 |             n_positions (:obj:`int`, optional, defaults to 256):
 44 |                 The maximum sequence length that this model might ever be used with.
 45 |                 Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
 46 |             n_ctx (:obj:`int`, optional, defaults to 256):
 47 |                 Dimensionality of the causal mask (usually same as n_positions).
 48 |             n_embd (:obj:`int`, optional, defaults to 1280):
 49 |                 Dimensionality of the embeddings and hidden states.
 50 |             dff (:obj:`int`, optional, defaults to 8192):
 51 |                 Dimensionality of the inner dimension of the FFN.
 52 |             n_layer (:obj:`int`, optional, defaults to 48):
 53 |                 Number of hidden layers in the Transformer encoder.
 54 |             n_head (:obj:`int`, optional, defaults to 16):
 55 |                 Number of attention heads for each attention layer in the Transformer encoder.
 56 |             resid_pdrop (:obj:`float`, optional, defaults to 0.1):
 57 |                 The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
 58 |             embd_pdrop (:obj:`int`, optional, defaults to 0.1):
 59 |                 The dropout ratio for the embeddings.
 60 |             attn_pdrop (:obj:`float`, optional, defaults to 0.1):
 61 |                 The dropout ratio for the attention.
 62 |             layer_norm_epsilon (:obj:`float`, optional, defaults to 1e-6):
 63 |                 The epsilon to use in the layer normalization layers
 64 |             initializer_range (:obj:`float`, optional, defaults to 0.02):
 65 |                 The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
 66 | 
 67 |         Example::
 68 | 
 69 |             >>> from transformers import CTRLModel, CTRLConfig
 70 | 
 71 |             >>> # Initializing a CTRL configuration
 72 |             >>> configuration = CTRLConfig()
 73 | 
 74 |             >>> # Initializing a model from the configuration
 75 |             >>> model = CTRLModel(configuration)
 76 | 
 77 |             >>> # Accessing the model configuration
 78 |             >>> configuration = model.config
 79 |     """
 80 | 
 81 |     model_type = "ctrl"
 82 | 
 83 |     def __init__(
 84 |         self,
 85 |         vocab_size=246534,
 86 |         n_positions=256,
 87 |         n_ctx=256,
 88 |         n_embd=1280,
 89 |         dff=8192,
 90 |         n_layer=48,
 91 |         n_head=16,
 92 |         resid_pdrop=0.1,
 93 |         embd_pdrop=0.1,
 94 |         attn_pdrop=0.1,
 95 |         layer_norm_epsilon=1e-6,
 96 |         initializer_range=0.02,
 97 |         summary_type="cls_index",
 98 |         summary_use_proj=True,
 99 |         summary_activation=None,
100 |         summary_proj_to_labels=True,
101 |         summary_first_dropout=0.1,
102 |         **kwargs
103 |     ):
104 |         super().__init__(**kwargs)
105 |         self.vocab_size = vocab_size
106 |         self.n_ctx = n_ctx
107 |         self.n_positions = n_positions
108 |         self.n_embd = n_embd
109 |         self.n_layer = n_layer
110 |         self.n_head = n_head
111 |         self.dff = dff
112 |         self.resid_pdrop = resid_pdrop
113 |         self.embd_pdrop = embd_pdrop
114 |         self.attn_pdrop = attn_pdrop
115 |         self.layer_norm_epsilon = layer_norm_epsilon
116 |         self.initializer_range = initializer_range
117 | 
118 |         self.summary_type = summary_type
119 |         self.summary_use_proj = summary_use_proj
120 |         self.summary_activation = summary_activation
121 |         self.summary_first_dropout = summary_first_dropout
122 |         self.summary_proj_to_labels = summary_proj_to_labels
123 | 
124 |     @property
125 |     def max_position_embeddings(self):
126 |         return self.n_positions
127 | 
128 |     @property
129 |     def hidden_size(self):
130 |         return self.n_embd
131 | 
132 |     @property
133 |     def num_attention_heads(self):
134 |         return self.n_head
135 | 
136 |     @property
137 |     def num_hidden_layers(self):
138 |         return self.n_layer
139 | 


--------------------------------------------------------------------------------
/transformers/tokenization_flaubert.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2019-present CNRS, Facebook Inc. and the HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Tokenization classes for Flaubert, based on XLM."""
 16 | 
 17 | 
 18 | import logging
 19 | import unicodedata
 20 | 
 21 | import six
 22 | 
 23 | from .tokenization_xlm import XLMTokenizer
 24 | 
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | VOCAB_FILES_NAMES = {
 29 |     "vocab_file": "vocab.json",
 30 |     "merges_file": "merges.txt",
 31 | }
 32 | 
 33 | PRETRAINED_VOCAB_FILES_MAP = {
 34 |     "vocab_file": {
 35 |         "flaubert/flaubert_small_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/vocab.json",
 36 |         "flaubert/flaubert_base_uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/vocab.json",
 37 |         "flaubert/flaubert_base_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/vocab.json",
 38 |         "flaubert/flaubert_large_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/vocab.json",
 39 |     },
 40 |     "merges_file": {
 41 |         "flaubert/flaubert_small_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/merges.txt",
 42 |         "flaubert/flaubert_base_uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/merges.txt",
 43 |         "flaubert/flaubert_base_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/merges.txt",
 44 |         "flaubert/flaubert_large_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/merges.txt",
 45 |     },
 46 | }
 47 | 
 48 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
 49 |     "flaubert/flaubert_small_cased": 512,
 50 |     "flaubert/flaubert_base_uncased": 512,
 51 |     "flaubert/flaubert_base_cased": 512,
 52 |     "flaubert/flaubert_large_cased": 512,
 53 | }
 54 | 
 55 | PRETRAINED_INIT_CONFIGURATION = {
 56 |     "flaubert/flaubert_small_cased": {"do_lowercase": False},
 57 |     "flaubert/flaubert_base_uncased": {"do_lowercase": True},
 58 |     "flaubert/flaubert_base_cased": {"do_lowercase": False},
 59 |     "flaubert/flaubert_large_cased": {"do_lowercase": False},
 60 | }
 61 | 
 62 | 
 63 | def convert_to_unicode(text):
 64 |     """
 65 |     Converts `text` to Unicode (if it's not already), assuming UTF-8 input.
 66 |     """
 67 |     # six_ensure_text is copied from https://github.com/benjaminp/six
 68 |     def six_ensure_text(s, encoding="utf-8", errors="strict"):
 69 |         if isinstance(s, six.binary_type):
 70 |             return s.decode(encoding, errors)
 71 |         elif isinstance(s, six.text_type):
 72 |             return s
 73 |         else:
 74 |             raise TypeError("not expecting type '%s'" % type(s))
 75 | 
 76 |     return six_ensure_text(text, encoding="utf-8", errors="ignore")
 77 | 
 78 | 
 79 | class FlaubertTokenizer(XLMTokenizer):
 80 |     """
 81 |     BPE tokenizer for Flaubert
 82 | 
 83 |     - Moses preprocessing & tokenization
 84 |     - Normalize all inputs text
 85 |     - argument ``special_tokens`` and function ``set_special_tokens``, can be used to add additional symbols \
 86 |       (ex: "__classify__") to a vocabulary
 87 |     - `do_lowercase` controle lower casing (automatically set for pretrained vocabularies)
 88 | 
 89 |     This tokenizer inherits from :class:`~transformers.XLMTokenizer`. Please check the superclass for usage examples
 90 |     and documentation regarding arguments.
 91 |     """
 92 | 
 93 |     vocab_files_names = VOCAB_FILES_NAMES
 94 |     pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
 95 |     pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
 96 |     max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
 97 | 
 98 |     def __init__(self, do_lowercase=False, **kwargs):
 99 |         super().__init__(**kwargs)
100 |         self.do_lowercase = do_lowercase
101 |         self.do_lowercase_and_remove_accent = False
102 | 
103 |     def preprocess_text(self, text):
104 |         text = text.replace("``", '"').replace("''", '"')
105 |         text = convert_to_unicode(text)
106 |         text = unicodedata.normalize("NFC", text)
107 | 
108 |         if self.do_lowercase:
109 |             text = text.lower()
110 | 
111 |         return text
112 | 
113 |     def _tokenize(self, text, bypass_tokenizer=False):
114 |         """
115 |         Tokenize a string given language code using Moses.
116 | 
117 |         Details of tokenization:
118 |         - [sacremoses](https://github.com/alvations/sacremoses): port of Moses
119 |             - Install with `pip install sacremoses`
120 | 
121 |         Args:
122 |             - bypass_tokenizer: Allow users to preprocess and tokenize the sentences externally (default = False)  (bool). If True, we only apply BPE.
123 | 
124 |         Returns:
125 |             List of tokens.
126 |         """
127 |         lang = "fr"
128 |         if lang and self.lang2id and lang not in self.lang2id:
129 |             logger.error(
130 |                 "Supplied language code not found in lang2id mapping. Please check that your language is supported by the loaded pretrained model."
131 |             )
132 | 
133 |         if bypass_tokenizer:
134 |             text = text.split()
135 |         else:
136 |             text = self.preprocess_text(text)
137 |             text = self.moses_pipeline(text, lang=lang)
138 |             text = self.moses_tokenize(text, lang=lang)
139 | 
140 |         split_tokens = []
141 |         for token in text:
142 |             if token:
143 |                 split_tokens.extend([t for t in self.bpe(token).split(" ")])
144 | 
145 |         return split_tokens
146 | 


--------------------------------------------------------------------------------
/transformers/configuration_bart.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Fairseq Authors and The HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """ BART configuration """
 16 | 
 17 | 
 18 | import logging
 19 | 
 20 | from .configuration_utils import PretrainedConfig
 21 | 
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | BART_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 26 |     "facebook/bart-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-base/config.json",
 27 |     "facebook/bart-large": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large/config.json",
 28 |     "facebook/bart-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-mnli/config.json",
 29 |     "facebook/bart-large-cnn": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-cnn/config.json",
 30 |     "facebook/bart-large-xsum": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-xsum/config.json",
 31 |     "facebook/mbart-large-en-ro": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/config.json",
 32 |     "yjernite/bart_eli5": "https://s3.amazonaws.com/models.huggingface.co/bert/yjernite/bart_eli5/config.json",
 33 | }
 34 | 
 35 | 
 36 | class BartConfig(PretrainedConfig):
 37 |     r"""
 38 |         Configuration class for Bart. Parameters are renamed from the fairseq implementation
 39 |     """
 40 |     model_type = "bart"
 41 | 
 42 |     def __init__(
 43 |         self,
 44 |         activation_dropout=0.0,
 45 |         extra_pos_embeddings=2,
 46 |         activation_function="gelu",
 47 |         vocab_size=50265,
 48 |         d_model=1024,
 49 |         encoder_ffn_dim=4096,
 50 |         encoder_layers=12,
 51 |         encoder_attention_heads=16,
 52 |         decoder_ffn_dim=4096,
 53 |         decoder_layers=12,
 54 |         decoder_attention_heads=16,
 55 |         encoder_layerdrop=0.0,
 56 |         decoder_layerdrop=0.0,
 57 |         attention_dropout=0.0,
 58 |         dropout=0.1,
 59 |         max_position_embeddings=1024,
 60 |         init_std=0.02,
 61 |         classifier_dropout=0.0,
 62 |         num_labels=3,
 63 |         is_encoder_decoder=True,
 64 |         pad_token_id=1,
 65 |         bos_token_id=0,
 66 |         eos_token_id=2,
 67 |         normalize_before=False,
 68 |         add_final_layer_norm=False,
 69 |         scale_embedding=False,
 70 |         normalize_embedding=True,
 71 |         static_position_embeddings=False,
 72 |         add_bias_logits=False,
 73 |         **common_kwargs
 74 |     ):
 75 |         r"""
 76 |             :class:`~transformers.BartConfig` is the configuration class for `BartModel`.
 77 | 
 78 |             Examples::
 79 | 
 80 |                 >>> from transformers import BartConfig, BartModel
 81 | 
 82 |                 >>> config = BartConfig.from_pretrained('facebook/bart-large')
 83 |                 >>> model = BartModel(config)
 84 |         """
 85 |         if "hidden_size" in common_kwargs:
 86 |             raise ValueError("hidden size is called d_model")
 87 |         super().__init__(
 88 |             num_labels=num_labels,
 89 |             pad_token_id=pad_token_id,
 90 |             bos_token_id=bos_token_id,
 91 |             eos_token_id=eos_token_id,
 92 |             is_encoder_decoder=is_encoder_decoder,
 93 |             **common_kwargs,
 94 |         )
 95 |         self.vocab_size = vocab_size
 96 |         self.d_model = d_model  # encoder_embed_dim and decoder_embed_dim
 97 |         self.encoder_ffn_dim = encoder_ffn_dim
 98 |         self.encoder_layers = self.num_hidden_layers = encoder_layers
 99 |         self.encoder_attention_heads = encoder_attention_heads
100 |         self.encoder_layerdrop = encoder_layerdrop
101 |         self.decoder_layerdrop = decoder_layerdrop
102 |         self.decoder_ffn_dim = decoder_ffn_dim
103 |         self.decoder_layers = decoder_layers
104 |         self.decoder_attention_heads = decoder_attention_heads
105 |         self.max_position_embeddings = max_position_embeddings
106 |         self.init_std = init_std  # Normal(0, this parameter)
107 |         self.activation_function = activation_function
108 | 
109 |         # Params introduced for Mbart
110 |         self.scale_embedding = scale_embedding  # scale factor will be sqrt(d_model) if True
111 |         self.normalize_embedding = normalize_embedding  # True for mbart, False otherwise
112 |         self.normalize_before = normalize_before  # combo of fairseq's encoder_ and decoder_normalize_before
113 |         self.add_final_layer_norm = add_final_layer_norm
114 | 
115 |         # Params introduced for Marian
116 |         self.add_bias_logits = add_bias_logits
117 |         self.static_position_embeddings = static_position_embeddings
118 | 
119 |         # 3 Types of Dropout
120 |         self.attention_dropout = attention_dropout
121 |         self.activation_dropout = activation_dropout
122 |         self.dropout = dropout
123 | 
124 |         # Classifier stuff
125 |         self.classif_dropout = classifier_dropout
126 | 
127 |         # pos embedding offset
128 |         self.extra_pos_embeddings = self.pad_token_id + 1
129 | 
130 |     @property
131 |     def num_attention_heads(self) -> int:
132 |         return self.encoder_attention_heads
133 | 
134 |     @property
135 |     def hidden_size(self) -> int:
136 |         return self.d_model
137 | 
138 |     def is_valid_mbart(self) -> bool:
139 |         """Is the configuration aligned with the MBART paper."""
140 |         if self.normalize_before and self.add_final_layer_norm and self.scale_embedding:
141 |             return True
142 |         if self.normalize_before or self.add_final_layer_norm or self.scale_embedding:
143 |             logger.info("This configuration is a mixture of MBART and BART settings")
144 |         return False
145 | 
146 | 
147 | class MBartConfig(BartConfig):
148 |     model_type = "mbart"
149 | 


--------------------------------------------------------------------------------
/transformers/commands/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from argparse import ArgumentParser, Namespace
  3 | from logging import getLogger
  4 | 
  5 | from transformers import SingleSentenceClassificationProcessor as Processor
  6 | from transformers import TextClassificationPipeline, is_tf_available, is_torch_available
  7 | from transformers.commands import BaseTransformersCLICommand
  8 | 
  9 | 
 10 | if not is_tf_available() and not is_torch_available():
 11 |     raise RuntimeError("At least one of PyTorch or TensorFlow 2.0+ should be installed to use CLI training")
 12 | 
 13 | # TF training parameters
 14 | USE_XLA = False
 15 | USE_AMP = False
 16 | 
 17 | 
 18 | def train_command_factory(args: Namespace):
 19 |     """
 20 |     Factory function used to instantiate serving server from provided command line arguments.
 21 |     :return: ServeCommand
 22 |     """
 23 |     return TrainCommand(args)
 24 | 
 25 | 
 26 | class TrainCommand(BaseTransformersCLICommand):
 27 |     @staticmethod
 28 |     def register_subcommand(parser: ArgumentParser):
 29 |         """
 30 |         Register this command to argparse so it's available for the transformer-cli
 31 |         :param parser: Root parser to register command-specific arguments
 32 |         :return:
 33 |         """
 34 |         train_parser = parser.add_parser("train", help="CLI tool to train a model on a task.")
 35 | 
 36 |         train_parser.add_argument(
 37 |             "--train_data",
 38 |             type=str,
 39 |             required=True,
 40 |             help="path to train (and optionally evaluation) dataset as a csv with "
 41 |             "tab separated labels and sentences.",
 42 |         )
 43 |         train_parser.add_argument(
 44 |             "--column_label", type=int, default=0, help="Column of the dataset csv file with example labels."
 45 |         )
 46 |         train_parser.add_argument(
 47 |             "--column_text", type=int, default=1, help="Column of the dataset csv file with example texts."
 48 |         )
 49 |         train_parser.add_argument(
 50 |             "--column_id", type=int, default=2, help="Column of the dataset csv file with example ids."
 51 |         )
 52 |         train_parser.add_argument(
 53 |             "--skip_first_row", action="store_true", help="Skip the first row of the csv file (headers)."
 54 |         )
 55 | 
 56 |         train_parser.add_argument("--validation_data", type=str, default="", help="path to validation dataset.")
 57 |         train_parser.add_argument(
 58 |             "--validation_split",
 59 |             type=float,
 60 |             default=0.1,
 61 |             help="if validation dataset is not provided, fraction of train dataset " "to use as validation dataset.",
 62 |         )
 63 | 
 64 |         train_parser.add_argument("--output", type=str, default="./", help="path to saved the trained model.")
 65 | 
 66 |         train_parser.add_argument(
 67 |             "--task", type=str, default="text_classification", help="Task to train the model on."
 68 |         )
 69 |         train_parser.add_argument(
 70 |             "--model", type=str, default="bert-base-uncased", help="Model's name or path to stored model."
 71 |         )
 72 |         train_parser.add_argument("--train_batch_size", type=int, default=32, help="Batch size for training.")
 73 |         train_parser.add_argument("--valid_batch_size", type=int, default=64, help="Batch size for validation.")
 74 |         train_parser.add_argument("--learning_rate", type=float, default=3e-5, help="Learning rate.")
 75 |         train_parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon for Adam optimizer.")
 76 |         train_parser.set_defaults(func=train_command_factory)
 77 | 
 78 |     def __init__(self, args: Namespace):
 79 |         self.logger = getLogger("transformers-cli/training")
 80 | 
 81 |         self.framework = "tf" if is_tf_available() else "torch"
 82 | 
 83 |         os.makedirs(args.output, exist_ok=True)
 84 |         assert os.path.isdir(args.output)
 85 |         self.output = args.output
 86 | 
 87 |         self.column_label = args.column_label
 88 |         self.column_text = args.column_text
 89 |         self.column_id = args.column_id
 90 | 
 91 |         self.logger.info("Loading {} pipeline for {}".format(args.task, args.model))
 92 |         if args.task == "text_classification":
 93 |             self.pipeline = TextClassificationPipeline.from_pretrained(args.model)
 94 |         elif args.task == "token_classification":
 95 |             raise NotImplementedError
 96 |         elif args.task == "question_answering":
 97 |             raise NotImplementedError
 98 | 
 99 |         self.logger.info("Loading dataset from {}".format(args.train_data))
100 |         self.train_dataset = Processor.create_from_csv(
101 |             args.train_data,
102 |             column_label=args.column_label,
103 |             column_text=args.column_text,
104 |             column_id=args.column_id,
105 |             skip_first_row=args.skip_first_row,
106 |         )
107 |         self.valid_dataset = None
108 |         if args.validation_data:
109 |             self.logger.info("Loading validation dataset from {}".format(args.validation_data))
110 |             self.valid_dataset = Processor.create_from_csv(
111 |                 args.validation_data,
112 |                 column_label=args.column_label,
113 |                 column_text=args.column_text,
114 |                 column_id=args.column_id,
115 |                 skip_first_row=args.skip_first_row,
116 |             )
117 | 
118 |         self.validation_split = args.validation_split
119 |         self.train_batch_size = args.train_batch_size
120 |         self.valid_batch_size = args.valid_batch_size
121 |         self.learning_rate = args.learning_rate
122 |         self.adam_epsilon = args.adam_epsilon
123 | 
124 |     def run(self):
125 |         if self.framework == "tf":
126 |             return self.run_tf()
127 |         return self.run_torch()
128 | 
129 |     def run_torch(self):
130 |         raise NotImplementedError
131 | 
132 |     def run_tf(self):
133 |         self.pipeline.fit(
134 |             self.train_dataset,
135 |             validation_data=self.valid_dataset,
136 |             validation_split=self.validation_split,
137 |             learning_rate=self.learning_rate,
138 |             adam_epsilon=self.adam_epsilon,
139 |             train_batch_size=self.train_batch_size,
140 |             valid_batch_size=self.valid_batch_size,
141 |         )
142 | 
143 |         # Save trained pipeline
144 |         self.pipeline.save_pretrained(self.output)
145 | 


--------------------------------------------------------------------------------
/transformers/modeling_tf_camembert.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
  3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """ TF 2.0 CamemBERT model. """
 17 | 
 18 | 
 19 | import logging
 20 | 
 21 | from .configuration_camembert import CamembertConfig
 22 | from .file_utils import add_start_docstrings
 23 | from .modeling_tf_roberta import (
 24 |     TFRobertaForMaskedLM,
 25 |     TFRobertaForMultipleChoice,
 26 |     TFRobertaForQuestionAnswering,
 27 |     TFRobertaForSequenceClassification,
 28 |     TFRobertaForTokenClassification,
 29 |     TFRobertaModel,
 30 | )
 31 | 
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
 36 |     # See all CamemBERT models at https://huggingface.co/models?filter=camembert
 37 | ]
 38 | 
 39 | 
 40 | CAMEMBERT_START_DOCSTRING = r"""
 41 | 
 42 |     .. note::
 43 | 
 44 |         TF 2.0 models accepts two formats as inputs:
 45 | 
 46 |             - having all inputs as keyword arguments (like PyTorch models), or
 47 |             - having all inputs as a list, tuple or dict in the first positional arguments.
 48 | 
 49 |         This second option is useful when using :obj:`tf.keras.Model.fit()` method which currently requires having
 50 |         all the tensors in the first argument of the model call function: :obj:`model(inputs)`.
 51 | 
 52 |         If you choose this second option, there are three possibilities you can use to gather all the input Tensors
 53 |         in the first positional argument :
 54 | 
 55 |         - a single Tensor with input_ids only and nothing else: :obj:`model(inputs_ids)`
 56 |         - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
 57 |           :obj:`model([input_ids, attention_mask])` or :obj:`model([input_ids, attention_mask, token_type_ids])`
 58 |         - a dictionary with one or several input Tensors associated to the input names given in the docstring:
 59 |           :obj:`model({'input_ids': input_ids, 'token_type_ids': token_type_ids})`
 60 | 
 61 |     Parameters:
 62 |         config (:class:`~transformers.CamembertConfig`): Model configuration class with all the parameters of the
 63 |             model. Initializing with a config file does not load the weights associated with the model, only the configuration.
 64 |             Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
 65 |         output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`):
 66 |             If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
 67 | """
 68 | 
 69 | 
 70 | @add_start_docstrings(
 71 |     "The bare CamemBERT Model transformer outputting raw hidden-states without any specific head on top.",
 72 |     CAMEMBERT_START_DOCSTRING,
 73 | )
 74 | class TFCamembertModel(TFRobertaModel):
 75 |     """
 76 |     This class overrides :class:`~transformers.TFRobertaModel`. Please check the
 77 |     superclass for the appropriate documentation alongside usage examples.
 78 |     """
 79 | 
 80 |     config_class = CamembertConfig
 81 | 
 82 | 
 83 | @add_start_docstrings(
 84 |     """CamemBERT Model with a `language modeling` head on top. """, CAMEMBERT_START_DOCSTRING,
 85 | )
 86 | class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
 87 |     """
 88 |     This class overrides :class:`~transformers.TFRobertaForMaskedLM`. Please check the
 89 |     superclass for the appropriate documentation alongside usage examples.
 90 |     """
 91 | 
 92 |     config_class = CamembertConfig
 93 | 
 94 | 
 95 | @add_start_docstrings(
 96 |     """CamemBERT Model transformer with a sequence classification/regression head on top (a linear layer
 97 |     on top of the pooled output) e.g. for GLUE tasks. """,
 98 |     CAMEMBERT_START_DOCSTRING,
 99 | )
100 | class TFCamembertForSequenceClassification(TFRobertaForSequenceClassification):
101 |     """
102 |     This class overrides :class:`~transformers.TFRobertaForSequenceClassification`. Please check the
103 |     superclass for the appropriate documentation alongside usage examples.
104 |     """
105 | 
106 |     config_class = CamembertConfig
107 | 
108 | 
109 | @add_start_docstrings(
110 |     """CamemBERT Model with a token classification head on top (a linear layer on top of
111 |     the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,
112 |     CAMEMBERT_START_DOCSTRING,
113 | )
114 | class TFCamembertForTokenClassification(TFRobertaForTokenClassification):
115 |     """
116 |     This class overrides :class:`~transformers.TFRobertaForTokenClassification`. Please check the
117 |     superclass for the appropriate documentation alongside usage examples.
118 |     """
119 | 
120 |     config_class = CamembertConfig
121 | 
122 | 
123 | @add_start_docstrings(
124 |     """CamemBERT Model with a multiple choice classification head on top (a linear layer on top of
125 |     the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """,
126 |     CAMEMBERT_START_DOCSTRING,
127 | )
128 | class TFCamembertForMultipleChoice(TFRobertaForMultipleChoice):
129 |     """
130 |     This class overrides :class:`~transformers.TFRobertaForMultipleChoice`. Please check the
131 |     superclass for the appropriate documentation alongside usage examples.
132 |     """
133 | 
134 |     config_class = CamembertConfig
135 | 
136 | 
137 | @add_start_docstrings(
138 |     """CamemBERT Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`). """,
139 |     CAMEMBERT_START_DOCSTRING,
140 | )
141 | class TFCamembertForQuestionAnswering(TFRobertaForQuestionAnswering):
142 |     """
143 |     This class overrides :class:`~transformers.TFRobertaForQuestionAnswering`. Please check the
144 |     superclass for the appropriate documentation alongside usage examples.
145 |     """
146 | 
147 |     config_class = CamembertConfig
148 | 


--------------------------------------------------------------------------------
/transformers/modeling_tf_xlm_roberta.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2019 Facebook AI Research and the HuggingFace Inc. team.
  3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """ TF 2.0  XLM-RoBERTa model. """
 17 | 
 18 | 
 19 | import logging
 20 | 
 21 | from .configuration_xlm_roberta import XLMRobertaConfig
 22 | from .file_utils import add_start_docstrings
 23 | from .modeling_tf_roberta import (
 24 |     TFRobertaForMaskedLM,
 25 |     TFRobertaForMultipleChoice,
 26 |     TFRobertaForQuestionAnswering,
 27 |     TFRobertaForSequenceClassification,
 28 |     TFRobertaForTokenClassification,
 29 |     TFRobertaModel,
 30 | )
 31 | 
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
 36 |     # See all XLM-RoBERTa models at https://huggingface.co/models?filter=xlm-roberta
 37 | ]
 38 | 
 39 | 
 40 | XLM_ROBERTA_START_DOCSTRING = r"""
 41 | 
 42 |     .. note::
 43 | 
 44 |         TF 2.0 models accepts two formats as inputs:
 45 | 
 46 |             - having all inputs as keyword arguments (like PyTorch models), or
 47 |             - having all inputs as a list, tuple or dict in the first positional arguments.
 48 | 
 49 |         This second option is useful when using :obj:`tf.keras.Model.fit()` method which currently requires having
 50 |         all the tensors in the first argument of the model call function: :obj:`model(inputs)`.
 51 | 
 52 |         If you choose this second option, there are three possibilities you can use to gather all the input Tensors
 53 |         in the first positional argument :
 54 | 
 55 |         - a single Tensor with input_ids only and nothing else: :obj:`model(inputs_ids)`
 56 |         - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
 57 |           :obj:`model([input_ids, attention_mask])` or :obj:`model([input_ids, attention_mask, token_type_ids])`
 58 |         - a dictionary with one or several input Tensors associated to the input names given in the docstring:
 59 |           :obj:`model({'input_ids': input_ids, 'token_type_ids': token_type_ids})`
 60 | 
 61 |     Parameters:
 62 |         config (:class:`~transformers.XLMRobertaConfig`): Model configuration class with all the parameters of the
 63 |             model. Initializing with a config file does not load the weights associated with the model, only the configuration.
 64 |             Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
 65 |         output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`):
 66 |             If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
 67 | """
 68 | 
 69 | 
 70 | @add_start_docstrings(
 71 |     "The bare XLM-RoBERTa Model transformer outputting raw hidden-states without any specific head on top.",
 72 |     XLM_ROBERTA_START_DOCSTRING,
 73 | )
 74 | class TFXLMRobertaModel(TFRobertaModel):
 75 |     """
 76 |     This class overrides :class:`~transformers.TFRobertaModel`. Please check the
 77 |     superclass for the appropriate documentation alongside usage examples.
 78 |     """
 79 | 
 80 |     config_class = XLMRobertaConfig
 81 | 
 82 | 
 83 | @add_start_docstrings(
 84 |     """XLM-RoBERTa Model with a `language modeling` head on top. """, XLM_ROBERTA_START_DOCSTRING,
 85 | )
 86 | class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM):
 87 |     """
 88 |     This class overrides :class:`~transformers.TFRobertaForMaskedLM`. Please check the
 89 |     superclass for the appropriate documentation alongside usage examples.
 90 |     """
 91 | 
 92 |     config_class = XLMRobertaConfig
 93 | 
 94 | 
 95 | @add_start_docstrings(
 96 |     """XLM-RoBERTa Model transformer with a sequence classification/regression head on top (a linear layer
 97 |     on top of the pooled output) e.g. for GLUE tasks. """,
 98 |     XLM_ROBERTA_START_DOCSTRING,
 99 | )
100 | class TFXLMRobertaForSequenceClassification(TFRobertaForSequenceClassification):
101 |     """
102 |     This class overrides :class:`~transformers.TFRobertaForSequenceClassification`. Please check the
103 |     superclass for the appropriate documentation alongside usage examples.
104 |     """
105 | 
106 |     config_class = XLMRobertaConfig
107 | 
108 | 
109 | @add_start_docstrings(
110 |     """XLM-RoBERTa Model with a token classification head on top (a linear layer on top of
111 |     the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,
112 |     XLM_ROBERTA_START_DOCSTRING,
113 | )
114 | class TFXLMRobertaForTokenClassification(TFRobertaForTokenClassification):
115 |     """
116 |     This class overrides :class:`~transformers.TFRobertaForTokenClassification`. Please check the
117 |     superclass for the appropriate documentation alongside usage examples.
118 |     """
119 | 
120 |     config_class = XLMRobertaConfig
121 | 
122 | 
123 | @add_start_docstrings(
124 |     """XLM-RoBERTa Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`). """,
125 |     XLM_ROBERTA_START_DOCSTRING,
126 | )
127 | class TFXLMRobertaForQuestionAnswering(TFRobertaForQuestionAnswering):
128 |     """
129 |     This class overrides :class:`~transformers.TFRobertaForQuestionAnsweringSimple`. Please check the
130 |     superclass for the appropriate documentation alongside usage examples.
131 |     """
132 | 
133 |     config_class = XLMRobertaConfig
134 | 
135 | 
136 | @add_start_docstrings(
137 |     """Roberta Model with a multiple choice classification head on top (a linear layer on top of
138 |     the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """,
139 |     XLM_ROBERTA_START_DOCSTRING,
140 | )
141 | class TFXLMRobertaForMultipleChoice(TFRobertaForMultipleChoice):
142 |     """
143 |     This class overrides :class:`~transformers.TFRobertaForMultipleChoice`. Please check the
144 |     superclass for the appropriate documentation alongside usage examples.
145 |     """
146 | 
147 |     config_class = XLMRobertaConfig
148 | 


--------------------------------------------------------------------------------
/transformers/convert_bart_original_pytorch_checkpoint_to_pytorch.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Convert BART checkpoint."""
 16 | 
 17 | 
 18 | import argparse
 19 | import logging
 20 | import os
 21 | from pathlib import Path
 22 | 
 23 | import fairseq
 24 | import torch
 25 | from packaging import version
 26 | 
 27 | from transformers import (
 28 |     BartConfig,
 29 |     BartForConditionalGeneration,
 30 |     BartForSequenceClassification,
 31 |     BartModel,
 32 |     BartTokenizer,
 33 | )
 34 | from transformers.modeling_bart import _make_linear_from_emb
 35 | 
 36 | 
 37 | FAIRSEQ_MODELS = ["bart.large", "bart.large.mnli", "bart.large.cnn", "bart_xsum/model.pt"]
 38 | extra_arch = {"bart.large": BartModel, "bart.large.mnli": BartForSequenceClassification}
 39 | if version.parse(fairseq.__version__) < version.parse("0.9.0"):
 40 |     raise Exception("requires fairseq >= 0.9.0")
 41 | 
 42 | 
 43 | logging.basicConfig(level=logging.INFO)
 44 | logger = logging.getLogger(__name__)
 45 | 
 46 | SAMPLE_TEXT = " Hello world! cécé herlolip"
 47 | 
 48 | mnli_rename_keys = [
 49 |     ("model.classification_heads.mnli.dense.weight", "classification_head.dense.weight"),
 50 |     ("model.classification_heads.mnli.dense.bias", "classification_head.dense.bias"),
 51 |     ("model.classification_heads.mnli.out_proj.weight", "classification_head.out_proj.weight"),
 52 |     ("model.classification_heads.mnli.out_proj.bias", "classification_head.out_proj.bias"),
 53 | ]
 54 | 
 55 | 
 56 | def remove_ignore_keys_(state_dict):
 57 |     ignore_keys = [
 58 |         "encoder.version",
 59 |         "decoder.version",
 60 |         "model.encoder.version",
 61 |         "model.decoder.version",
 62 |         "_float_tensor",
 63 |     ]
 64 |     for k in ignore_keys:
 65 |         state_dict.pop(k, None)
 66 | 
 67 | 
 68 | def rename_key(dct, old, new):
 69 |     val = dct.pop(old)
 70 |     dct[new] = val
 71 | 
 72 | 
 73 | def load_xsum_checkpoint(checkpoint_path):
 74 |     """Checkpoint path should end in model.pt"""
 75 |     sd = torch.load(checkpoint_path, map_location="cpu")
 76 |     hub_interface = torch.hub.load("pytorch/fairseq", "bart.large.cnn").eval()
 77 |     hub_interface.model.load_state_dict(sd["model"])
 78 |     return hub_interface
 79 | 
 80 | 
 81 | def convert_checkpoint_from_disk(checkpoint_path, **config_kwargs):
 82 |     state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
 83 |     remove_ignore_keys_(state_dict)
 84 |     vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
 85 |     state_dict["shared.weight"] = state_dict["decoder.embed_tokens.weight"]
 86 |     mbart_config = BartConfig(vocab_size=vocab_size, **config_kwargs)
 87 |     model = BartForConditionalGeneration(mbart_config)
 88 |     model.model.load_state_dict(state_dict)
 89 |     if hasattr(model, "lm_head"):
 90 |         model.lm_head = _make_linear_from_emb(model.model.shared)
 91 |     return model
 92 | 
 93 | 
 94 | @torch.no_grad()
 95 | def convert_bart_checkpoint(checkpoint_path, pytorch_dump_folder_path, hf_checkpoint_name=None):
 96 |     """
 97 |     Copy/paste/tweak model's weights to our BERT structure.
 98 |     """
 99 |     if not os.path.exists(checkpoint_path):
100 |         bart = torch.hub.load("pytorch/fairseq", checkpoint_path).eval()
101 |     else:
102 |         bart = load_xsum_checkpoint(checkpoint_path)
103 | 
104 |     bart.model.upgrade_state_dict(bart.model.state_dict())
105 |     if hf_checkpoint_name is None:
106 |         hf_checkpoint_name = checkpoint_path.replace(".", "-")
107 |     config = BartConfig.from_pretrained(hf_checkpoint_name)
108 |     tokens = bart.encode(SAMPLE_TEXT).unsqueeze(0)
109 |     tokens2 = BartTokenizer.from_pretrained(hf_checkpoint_name).encode(SAMPLE_TEXT, return_tensors="pt").unsqueeze(0)
110 |     assert torch.eq(tokens, tokens2).all()
111 | 
112 |     if checkpoint_path == "bart.large.mnli":
113 |         state_dict = bart.state_dict()
114 |         remove_ignore_keys_(state_dict)
115 |         state_dict["model.shared.weight"] = state_dict["model.decoder.embed_tokens.weight"]
116 |         for src, dest in mnli_rename_keys:
117 |             rename_key(state_dict, src, dest)
118 |         model = BartForSequenceClassification(config).eval()
119 |         model.load_state_dict(state_dict)
120 |         fairseq_output = bart.predict("mnli", tokens, return_logits=True)
121 |         new_model_outputs = model(tokens)[0]  # logits
122 |     else:  # no classification heads to worry about
123 |         state_dict = bart.model.state_dict()
124 |         remove_ignore_keys_(state_dict)
125 |         state_dict["shared.weight"] = state_dict["decoder.embed_tokens.weight"]
126 |         fairseq_output = bart.extract_features(tokens)
127 |         if hf_checkpoint_name == "facebook/bart-large":
128 |             model = BartModel(config).eval()
129 |             model.load_state_dict(state_dict)
130 |             new_model_outputs = model(tokens).model[0]
131 |         else:
132 |             model = BartForConditionalGeneration(config).eval()  # an existing summarization ckpt
133 |             model.model.load_state_dict(state_dict)
134 |             if hasattr(model, "lm_head"):
135 |                 model.lm_head = _make_linear_from_emb(model.model.shared)
136 |             new_model_outputs = model.model(tokens)[0]
137 | 
138 |     # Check results
139 |     assert fairseq_output.shape == new_model_outputs.shape
140 |     assert (fairseq_output == new_model_outputs).all().item()
141 |     Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
142 |     model.save_pretrained(pytorch_dump_folder_path)
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     parser = argparse.ArgumentParser()
147 |     # Required parameters
148 |     parser.add_argument(
149 |         "fairseq_path", type=str, help="bart.large, bart.large.cnn or a path to a model.pt on local filesystem."
150 |     )
151 |     parser.add_argument("pytorch_dump_folder_path", default=None, type=str, help="Path to the output PyTorch model.")
152 |     parser.add_argument(
153 |         "--hf_config", default=None, type=str, help="Which huggingface architecture to use: bart-large-xsum"
154 |     )
155 |     args = parser.parse_args()
156 |     convert_bart_checkpoint(args.fairseq_path, args.pytorch_dump_folder_path, hf_checkpoint_name=args.hf_config)
157 | 


--------------------------------------------------------------------------------
/transformers/configuration_distilbert.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """ DistilBERT model configuration """
 16 | 
 17 | 
 18 | import logging
 19 | 
 20 | from .configuration_utils import PretrainedConfig
 21 | 
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 26 |     "distilbert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-config.json",
 27 |     "distilbert-base-uncased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-distilled-squad-config.json",
 28 |     "distilbert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-cased-config.json",
 29 |     "distilbert-base-cased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-cased-distilled-squad-config.json",
 30 |     "distilbert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-german-cased-config.json",
 31 |     "distilbert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-multilingual-cased-config.json",
 32 |     "distilbert-base-uncased-finetuned-sst-2-english": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json",
 33 | }
 34 | 
 35 | 
 36 | class DistilBertConfig(PretrainedConfig):
 37 |     r"""
 38 |         This is the configuration class to store the configuration of a :class:`~transformers.DistilBertModel`.
 39 |         It is used to instantiate a DistilBERT model according to the specified arguments, defining the model
 40 |         architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
 41 |         the DistilBERT `distilbert-base-uncased <https://huggingface.co/distilbert-base-uncased>`__ architecture.
 42 | 
 43 |         Configuration objects inherit from  :class:`~transformers.PretrainedConfig` and can be used
 44 |         to control the model outputs. Read the documentation from  :class:`~transformers.PretrainedConfig`
 45 |         for more information.
 46 | 
 47 | 
 48 |         Args:
 49 |             vocab_size (:obj:`int`, optional, defaults to 30522):
 50 |                 Vocabulary size of the DistilBERT model. Defines the different tokens that
 51 |                 can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.BertModel`.
 52 |             max_position_embeddings (:obj:`int`, optional, defaults to 512):
 53 |                 The maximum sequence length that this model might ever be used with.
 54 |                 Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
 55 |             sinusoidal_pos_embds (:obj:`boolean`, optional, defaults to :obj:`False`):
 56 |                 Whether to use sinusoidal positional embeddings.
 57 |             n_layers (:obj:`int`, optional, defaults to 6):
 58 |                 Number of hidden layers in the Transformer encoder.
 59 |             n_heads (:obj:`int`, optional, defaults to 12):
 60 |                 Number of attention heads for each attention layer in the Transformer encoder.
 61 |             dim (:obj:`int`, optional, defaults to 768):
 62 |                 Dimensionality of the encoder layers and the pooler layer.
 63 |             hidden_dim (:obj:`int`, optional, defaults to 3072):
 64 |                 The size of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
 65 |             dropout (:obj:`float`, optional, defaults to 0.1):
 66 |                 The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
 67 |             attention_dropout (:obj:`float`, optional, defaults to 0.1):
 68 |                 The dropout ratio for the attention probabilities.
 69 |             activation (:obj:`str` or :obj:`function`, optional, defaults to "gelu"):
 70 |                 The non-linear activation function (function or string) in the encoder and pooler.
 71 |                 If string, "gelu", "relu", "swish" and "gelu_new" are supported.
 72 |             initializer_range (:obj:`float`, optional, defaults to 0.02):
 73 |                 The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
 74 |             qa_dropout (:obj:`float`, optional, defaults to 0.1):
 75 |                 The dropout probabilities used in the question answering model
 76 |                 :class:`~transformers.DistilBertForQuestionAnswering`.
 77 |             seq_classif_dropout (:obj:`float`, optional, defaults to 0.2):
 78 |                 The dropout probabilities used in the sequence classification and the multiple choice model
 79 |                 :class:`~transformers.DistilBertForSequenceClassification`.
 80 | 
 81 |         Example::
 82 | 
 83 |             >>> from transformers import DistilBertModel, DistilBertConfig
 84 | 
 85 |             >>> # Initializing a DistilBERT configuration
 86 |             >>> configuration = DistilBertConfig()
 87 | 
 88 |             >>> # Initializing a model from the configuration
 89 |             >>> model = DistilBertModel(configuration)
 90 | 
 91 |             >>> # Accessing the model configuration
 92 |             >>> configuration = model.config
 93 |     """
 94 |     model_type = "distilbert"
 95 | 
 96 |     def __init__(
 97 |         self,
 98 |         vocab_size=30522,
 99 |         max_position_embeddings=512,
100 |         sinusoidal_pos_embds=False,
101 |         n_layers=6,
102 |         n_heads=12,
103 |         dim=768,
104 |         hidden_dim=4 * 768,
105 |         dropout=0.1,
106 |         attention_dropout=0.1,
107 |         activation="gelu",
108 |         initializer_range=0.02,
109 |         qa_dropout=0.1,
110 |         seq_classif_dropout=0.2,
111 |         pad_token_id=0,
112 |         **kwargs
113 |     ):
114 |         super().__init__(**kwargs, pad_token_id=pad_token_id)
115 |         self.vocab_size = vocab_size
116 |         self.max_position_embeddings = max_position_embeddings
117 |         self.sinusoidal_pos_embds = sinusoidal_pos_embds
118 |         self.n_layers = n_layers
119 |         self.n_heads = n_heads
120 |         self.dim = dim
121 |         self.hidden_dim = hidden_dim
122 |         self.dropout = dropout
123 |         self.attention_dropout = attention_dropout
124 |         self.activation = activation
125 |         self.initializer_range = initializer_range
126 |         self.qa_dropout = qa_dropout
127 |         self.seq_classif_dropout = seq_classif_dropout
128 | 
129 |     @property
130 |     def hidden_size(self):
131 |         return self.dim
132 | 
133 |     @property
134 |     def num_attention_heads(self):
135 |         return self.n_heads
136 | 
137 |     @property
138 |     def num_hidden_layers(self):
139 |         return self.n_layers
140 | 


--------------------------------------------------------------------------------