├── fairseq ├── scripts │ ├── __init__.py │ ├── spm_train.py │ ├── compound_split_bleu.sh │ ├── sacrebleu_pregen.sh │ ├── convert_dictionary.lua │ ├── compare_namespaces.py │ ├── spm_decode.py │ ├── read_binarized.py │ └── shard_docs.py ├── tests │ ├── __init__.py │ ├── speech_recognition │ │ ├── __init__.py │ │ └── test_cross_entropy.py │ ├── test_iterators.py │ ├── test_character_token_embedder.py │ └── test_convtbc.py ├── fairseq_cli │ ├── __init__.py │ ├── eval_lm.py │ ├── score.py │ ├── setup.py │ ├── train.py │ ├── generate.py │ ├── interactive.py │ └── preprocess.py ├── fairseq │ ├── data │ │ ├── audio │ │ │ └── __init__.py │ │ ├── num_samples_dataset.py │ │ ├── id_dataset.py │ │ ├── legacy │ │ │ ├── __init__.py │ │ │ └── masked_lm_dictionary.py │ │ ├── offset_tokens_dataset.py │ │ ├── strip_token_dataset.py │ │ ├── encoders │ │ │ ├── space_tokenizer.py │ │ │ ├── nltk_tokenizer.py │ │ │ ├── __init__.py │ │ │ ├── sentencepiece_bpe.py │ │ │ ├── fastbpe.py │ │ │ ├── gpt2_bpe.py │ │ │ └── subword_nmt_bpe.py │ │ ├── raw_label_dataset.py │ │ ├── lru_cache_dataset.py │ │ ├── sentence_order_label_dataset.py │ │ ├── sort_dataset.py │ │ ├── list_dataset.py │ │ ├── numel_dataset.py │ │ ├── colorize_dataset.py │ │ ├── pad_dataset.py │ │ ├── truncate_dataset.py │ │ ├── prepend_dataset.py │ │ ├── prepend_token_dataset.py │ │ ├── replace_dataset.py │ │ ├── base_wrapper_dataset.py │ │ └── concat_sentences_dataset.py │ ├── modules │ │ ├── lightconv_layer │ │ │ ├── __init__.py │ │ │ ├── setup.py │ │ │ └── lightconv_cuda.cpp │ │ ├── dynamicconv_layer │ │ │ ├── __init__.py │ │ │ ├── setup.py │ │ │ ├── dynamiconv_cpu.cpp │ │ │ ├── dynamicconv_cuda.cuh │ │ │ └── dynamicconv_cuda.cpp │ │ ├── grad_multiply.py │ │ ├── unfold.py │ │ ├── layer_norm.py │ │ ├── gelu.py │ │ ├── logsumexp_moe.py │ │ ├── scalar_bias.py │ │ ├── positional_embedding.py │ │ ├── conv_tbc.py │ │ └── sparse_transformer_sentence_encoder_layer.py │ ├── models │ │ ├── roberta │ │ │ └── __init__.py │ │ ├── fairseq_encoder.py │ │ └── model_utils.py │ ├── tokenizer.py │ ├── __init__.py │ ├── criterions │ │ ├── __init__.py │ │ └── fairseq_criterion.py │ ├── optim │ │ ├── lr_scheduler │ │ │ ├── __init__.py │ │ │ └── fairseq_lr_scheduler.py │ │ ├── __init__.py │ │ ├── adagrad.py │ │ ├── sgd.py │ │ └── adadelta.py │ ├── clib │ │ └── libbleu │ │ │ └── module.cpp │ ├── tasks │ │ └── translation_from_pretrained_xlm.py │ └── pdb.py ├── examples │ ├── .gitignore │ ├── speech_recognition │ │ ├── __init__.py │ │ ├── models │ │ │ └── __init__.py │ │ ├── tasks │ │ │ └── __init__.py │ │ ├── data │ │ │ └── __init__.py │ │ └── criterions │ │ │ └── __init__.py │ ├── noisychannel │ │ └── __init__.py │ ├── __init__.py │ ├── roberta │ │ ├── commonsense_qa │ │ │ ├── __init__.py │ │ │ └── download_cqa_data.sh │ │ └── wsc │ │ │ └── __init__.py │ ├── language_model │ │ ├── prepare-wikitext-103.sh │ │ ├── transformer_lm │ │ │ └── README.md │ │ └── conv_lm │ │ │ └── README.md │ └── backtranslation │ │ └── README.md ├── docs │ ├── docutils.conf │ ├── requirements.txt │ ├── _static │ │ └── theme_overrides.css │ ├── modules.rst │ ├── Makefile │ ├── criterions.rst │ ├── make.bat │ ├── optim.rst │ ├── lr_scheduler.rst │ ├── index.rst │ ├── data.rst │ └── tasks.rst ├── fairseq.gif ├── fairseq_logo.png ├── hubconf.py ├── LICENSE └── CONTRIBUTING.md ├── style_paraphrase ├── __init__.py ├── logs │ └── .gitkeep ├── runs │ └── .gitkeep ├── saved_models │ └── .gitkeep ├── slurm-schedulers │ └── .gitkeep ├── evaluation │ ├── similarity │ │ ├── __init__.py │ │ ├── spm.py │ │ └── test_sim.py │ ├── .gitignore │ ├── human │ │ ├── crowdsourcing.png │ │ ├── crowdsourcing2.png │ │ ├── crowdsourcing3.png │ │ └── crowdsourcing4.png │ └── scripts │ │ ├── flip_labels.py │ │ ├── eval_shakespeare_baselines.sh │ │ └── eval_formality_baselines.sh ├── style_classify │ ├── webapp │ │ ├── static │ │ │ └── .gitkeep │ │ ├── run.sh │ │ └── templates │ │ │ └── visual.html │ └── .gitignore ├── .gitignore ├── examples │ ├── test_custom_0.sh │ ├── run_finetune_inverse_paraphrase.sh │ ├── formality │ │ ├── run_finetune_formality_0.sh │ │ └── run_finetune_formality_1.sh │ ├── run_evaluate_paraphrase.sh │ ├── shakespeare │ │ ├── run_finetune_shakespeare_0.sh │ │ └── run_finetune_shakespeare_1.sh │ └── run_finetune_paraphrase.sh ├── run_evaluate_gpt2_template.sh ├── hyperparameters_config.py └── run_finetune_gpt2_template.sh ├── outputs ├── shakespeare │ └── generated_p_09.txt └── cds │ └── README.md ├── transformers ├── transformers │ ├── tests │ │ ├── __init__.py │ │ ├── fixtures │ │ │ ├── input.txt │ │ │ └── test_sentencepiece.model │ │ └── conftest.py │ ├── data │ │ ├── processors │ │ │ └── __init__.py │ │ └── __init__.py │ └── configuration_camembert.py ├── MANIFEST.in ├── docs │ ├── source │ │ ├── examples.md │ │ ├── _static │ │ │ └── css │ │ │ │ ├── Calibre-Thin.otf │ │ │ │ ├── Calibre-Light.ttf │ │ │ │ ├── Calibre-Medium.otf │ │ │ │ ├── Calibre-Regular.otf │ │ │ │ └── code-snippets.css │ │ ├── imgs │ │ │ ├── transformers_logo_name.png │ │ │ ├── warmup_constant_schedule.png │ │ │ ├── warmup_cosine_schedule.png │ │ │ ├── warmup_linear_schedule.png │ │ │ ├── warmup_cosine_hard_restarts_schedule.png │ │ │ └── warmup_cosine_warm_restarts_schedule.png │ │ ├── main_classes │ │ │ ├── configuration.rst │ │ │ ├── model.rst │ │ │ ├── tokenizer.rst │ │ │ └── optimizer_schedules.rst │ │ ├── model_doc │ │ │ ├── transformerxl.rst │ │ │ ├── auto.rst │ │ │ ├── gpt2.rst │ │ │ ├── ctrl.rst │ │ │ ├── roberta.rst │ │ │ ├── gpt.rst │ │ │ ├── xlm.rst │ │ │ ├── distilbert.rst │ │ │ └── xlnet.rst │ │ ├── bertology.rst │ │ └── notebooks.rst │ ├── Makefile │ └── requirements.txt ├── examples │ ├── requirements.txt │ ├── tests_samples │ │ ├── .gitignore │ │ └── MRPC │ │ │ ├── dev.tsv │ │ │ └── train.tsv │ ├── distillation │ │ ├── requirements.txt │ │ └── training_configs │ │ │ ├── distilgpt2.json │ │ │ └── distilbert-base-uncased.json │ └── contrib │ │ └── README.md ├── docker │ └── Dockerfile ├── .coveragerc ├── requirements.txt ├── .github │ ├── ISSUE_TEMPLATE │ │ ├── question-help.md │ │ ├── ---new-benchmark.md │ │ ├── --new-model-addition.md │ │ ├── feature-request.md │ │ ├── bug-report.md │ │ └── migration.md │ └── stale.yml ├── templates │ └── adding_a_new_example_script │ │ └── README.md ├── .circleci │ └── deploy.sh └── requirements-dev.txt ├── datasets ├── .gitignore ├── bpe2text.py ├── bpe2binary.sh ├── dataset2bpe.py └── prepare_paraphrase_data.py ├── mturk_evals ├── formality_gold_vs_generated_baseline_he_2020 │ ├── full_disagreement.csv │ └── label_all_110_150.csv ├── formality_gold_vs_generated_baseline_unmt │ ├── full_disagreement.csv │ └── mturk_batches │ │ └── batch_2.csv ├── shakespeare_gold_vs_generated_baseline_he_2020 │ ├── full_disagreement.csv │ └── label_all_117_150.csv ├── shakespeare_gold_vs_generated_baseline_unmt │ ├── full_disagreement.csv │ └── mturk_batches │ │ └── batch_2.csv ├── formality_gold_vs_generated_single_nucleus_paraphrase_0.0 │ └── full_disagreement.csv ├── formality_gold_vs_generated_single_nucleus_paraphrase_0.9 │ └── full_disagreement.csv ├── shakespeare_gold_vs_generated_single_model_nucleus_paraphrase │ ├── full_disagreement.csv │ ├── mturk_batches │ │ └── batch_3.csv │ └── label_all_104_150.csv ├── shakespeare_gold_vs_generated_single_nucleus_paraphrase_0.0 │ └── full_disagreement.csv └── formality_gold_vs_generated_single_nucleus_paraphrase │ └── full_disagreement.csv ├── web-demo ├── config.json ├── strap-frontend │ ├── src │ │ ├── url.js │ │ ├── j.d.ttf │ │ ├── index.js │ │ └── app.css │ ├── public │ │ ├── favicon.ico │ │ ├── manifest.json │ │ └── index.html │ ├── .gitignore │ └── package.json ├── strap-landing │ ├── john.jpg │ ├── mohit.png │ ├── thumb.png │ └── kalpesh.jpg ├── strap-backend │ └── waitress_server.py ├── setup.sh ├── clean_queue.py └── LICENSE ├── setup.py ├── data_samples └── README.md ├── .gitignore ├── LICENSE ├── paraphrase_many.py ├── demo_paraphraser.py ├── requirements.txt └── README-multilingual.md /fairseq/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fairseq/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /style_paraphrase/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fairseq/fairseq_cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /style_paraphrase/logs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /style_paraphrase/runs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/audio/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /outputs/shakespeare/generated_p_09.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /style_paraphrase/saved_models/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fairseq/fairseq_cli/eval_lm.py: -------------------------------------------------------------------------------- 1 | ../eval_lm.py -------------------------------------------------------------------------------- /fairseq/fairseq_cli/score.py: -------------------------------------------------------------------------------- 1 | ../score.py -------------------------------------------------------------------------------- /fairseq/fairseq_cli/setup.py: -------------------------------------------------------------------------------- 1 | ../setup.py -------------------------------------------------------------------------------- /fairseq/fairseq_cli/train.py: -------------------------------------------------------------------------------- 1 | ../train.py -------------------------------------------------------------------------------- /style_paraphrase/slurm-schedulers/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /transformers/transformers/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fairseq/fairseq_cli/generate.py: -------------------------------------------------------------------------------- 1 | ../generate.py -------------------------------------------------------------------------------- /fairseq/tests/speech_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /transformers/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | -------------------------------------------------------------------------------- /fairseq/examples/.gitignore: -------------------------------------------------------------------------------- 1 | !*/*.sh 2 | !*/*.md 3 | -------------------------------------------------------------------------------- /fairseq/fairseq_cli/interactive.py: -------------------------------------------------------------------------------- 1 | ../interactive.py -------------------------------------------------------------------------------- /fairseq/fairseq_cli/preprocess.py: -------------------------------------------------------------------------------- 1 | ../preprocess.py -------------------------------------------------------------------------------- /style_paraphrase/evaluation/similarity/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /style_paraphrase/evaluation/similarity/spm.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /style_paraphrase/style_classify/webapp/static/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fairseq/docs/docutils.conf: -------------------------------------------------------------------------------- 1 | [writers] 2 | option-limit=0 3 | -------------------------------------------------------------------------------- /transformers/docs/source/examples.md: -------------------------------------------------------------------------------- 1 | ../../examples/README.md -------------------------------------------------------------------------------- /fairseq/docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx<2.0 2 | sphinx-argparse 3 | -------------------------------------------------------------------------------- /datasets/.gitignore: -------------------------------------------------------------------------------- 1 | paranmt_filtered 2 | shakespeare 3 | formality 4 | -------------------------------------------------------------------------------- /mturk_evals/formality_gold_vs_generated_baseline_he_2020/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /style_paraphrase/.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | runs 3 | saved_models 4 | slurm-schedulers 5 | -------------------------------------------------------------------------------- /web-demo/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "output_dir": "/data/kalpesh/style_paraphrase_data" 3 | } -------------------------------------------------------------------------------- /mturk_evals/formality_gold_vs_generated_baseline_unmt/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | -------------------------------------------------------------------------------- /style_paraphrase/style_classify/.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | saved_models 3 | slurm-schedulers 4 | -------------------------------------------------------------------------------- /mturk_evals/shakespeare_gold_vs_generated_baseline_he_2020/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | -------------------------------------------------------------------------------- /mturk_evals/shakespeare_gold_vs_generated_baseline_unmt/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | -------------------------------------------------------------------------------- /fairseq/examples/speech_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | from . import tasks, criterions, models # noqa 2 | -------------------------------------------------------------------------------- /transformers/examples/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboardX 2 | tensorboard 3 | scikit-learn 4 | seqeval 5 | -------------------------------------------------------------------------------- /mturk_evals/formality_gold_vs_generated_single_nucleus_paraphrase_0.0/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | -------------------------------------------------------------------------------- /mturk_evals/formality_gold_vs_generated_single_nucleus_paraphrase_0.9/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | -------------------------------------------------------------------------------- /transformers/transformers/tests/fixtures/input.txt: -------------------------------------------------------------------------------- 1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer 2 | -------------------------------------------------------------------------------- /mturk_evals/shakespeare_gold_vs_generated_single_model_nucleus_paraphrase/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | -------------------------------------------------------------------------------- /mturk_evals/shakespeare_gold_vs_generated_single_nucleus_paraphrase_0.0/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | -------------------------------------------------------------------------------- /transformers/examples/tests_samples/.gitignore: -------------------------------------------------------------------------------- 1 | *.* 2 | cache* 3 | temp* 4 | !*.tsv 5 | !*.json 6 | !.gitignore -------------------------------------------------------------------------------- /fairseq/fairseq.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/fairseq/fairseq.gif -------------------------------------------------------------------------------- /web-demo/strap-frontend/src/url.js: -------------------------------------------------------------------------------- 1 | const SERVER_URL = "http://arkham.cs.umass.edu:8554" 2 | 3 | export default SERVER_URL; 4 | -------------------------------------------------------------------------------- /fairseq/fairseq_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/fairseq/fairseq_logo.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='style_paraphrase', version='1.0', packages=find_packages()) 4 | -------------------------------------------------------------------------------- /web-demo/strap-landing/john.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/web-demo/strap-landing/john.jpg -------------------------------------------------------------------------------- /web-demo/strap-landing/mohit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/web-demo/strap-landing/mohit.png -------------------------------------------------------------------------------- /web-demo/strap-landing/thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/web-demo/strap-landing/thumb.png -------------------------------------------------------------------------------- /web-demo/strap-frontend/src/j.d.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/web-demo/strap-frontend/src/j.d.ttf -------------------------------------------------------------------------------- /web-demo/strap-landing/kalpesh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/web-demo/strap-landing/kalpesh.jpg -------------------------------------------------------------------------------- /web-demo/strap-frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/web-demo/strap-frontend/public/favicon.ico -------------------------------------------------------------------------------- /transformers/examples/distillation/requirements.txt: -------------------------------------------------------------------------------- 1 | gitpython==3.0.2 2 | tensorboard>=1.14.0 3 | tensorboardX==1.8 4 | psutil==5.6.3 5 | scipy==1.3.1 6 | transformers==2.0.0 7 | -------------------------------------------------------------------------------- /style_paraphrase/evaluation/.gitignore: -------------------------------------------------------------------------------- 1 | similarity/sim 2 | fluency/cola_classifier 3 | accuracy/shakespeare_classifier 4 | accuracy/formality_classifier 5 | accuracy/cds_classifier 6 | -------------------------------------------------------------------------------- /style_paraphrase/evaluation/human/crowdsourcing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/style_paraphrase/evaluation/human/crowdsourcing.png -------------------------------------------------------------------------------- /style_paraphrase/evaluation/human/crowdsourcing2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/style_paraphrase/evaluation/human/crowdsourcing2.png -------------------------------------------------------------------------------- /style_paraphrase/evaluation/human/crowdsourcing3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/style_paraphrase/evaluation/human/crowdsourcing3.png -------------------------------------------------------------------------------- /style_paraphrase/evaluation/human/crowdsourcing4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/style_paraphrase/evaluation/human/crowdsourcing4.png -------------------------------------------------------------------------------- /transformers/docs/source/_static/css/Calibre-Thin.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/_static/css/Calibre-Thin.otf -------------------------------------------------------------------------------- /transformers/docs/source/_static/css/Calibre-Light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/_static/css/Calibre-Light.ttf -------------------------------------------------------------------------------- /transformers/docs/source/_static/css/Calibre-Medium.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/_static/css/Calibre-Medium.otf -------------------------------------------------------------------------------- /transformers/docs/source/_static/css/Calibre-Regular.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/_static/css/Calibre-Regular.otf -------------------------------------------------------------------------------- /transformers/docs/source/imgs/transformers_logo_name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/imgs/transformers_logo_name.png -------------------------------------------------------------------------------- /transformers/docs/source/imgs/warmup_constant_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/imgs/warmup_constant_schedule.png -------------------------------------------------------------------------------- /transformers/docs/source/imgs/warmup_cosine_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/imgs/warmup_cosine_schedule.png -------------------------------------------------------------------------------- /transformers/docs/source/imgs/warmup_linear_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/imgs/warmup_linear_schedule.png -------------------------------------------------------------------------------- /transformers/transformers/tests/fixtures/test_sentencepiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/transformers/tests/fixtures/test_sentencepiece.model -------------------------------------------------------------------------------- /transformers/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png -------------------------------------------------------------------------------- /transformers/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martiansideofthemoon/style-transfer-paraphrase/HEAD/transformers/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png -------------------------------------------------------------------------------- /web-demo/strap-backend/waitress_server.py: -------------------------------------------------------------------------------- 1 | import app 2 | from waitress import serve 3 | from paste.translogger import TransLogger 4 | serve(TransLogger(app.app, setup_console_handler=False), port=8554, host="0.0.0.0") 5 | -------------------------------------------------------------------------------- /transformers/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:latest 2 | 3 | RUN git clone https://github.com/NVIDIA/apex.git && cd apex && python setup.py install --cuda_ext --cpp_ext 4 | 5 | RUN pip install transformers 6 | 7 | WORKDIR /workspace -------------------------------------------------------------------------------- /transformers/transformers/data/processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import InputExample, InputFeatures, DataProcessor 2 | from .glue import glue_output_modes, glue_processors, glue_tasks_num_labels, glue_convert_examples_to_features 3 | 4 | -------------------------------------------------------------------------------- /fairseq/docs/_static/theme_overrides.css: -------------------------------------------------------------------------------- 1 | .wy-table-responsive table td kbd { 2 | white-space: nowrap; 3 | } 4 | .wy-table-responsive table td { 5 | white-space: normal !important; 6 | } 7 | .wy-table-responsive { 8 | overflow: visible !important; 9 | } 10 | -------------------------------------------------------------------------------- /fairseq/examples/noisychannel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .rerank_options import * # noqa 7 | -------------------------------------------------------------------------------- /transformers/examples/distillation/training_configs/distilgpt2.json: -------------------------------------------------------------------------------- 1 | { 2 | "initializer_range": 0.02, 3 | "layer_norm_epsilon": 0.00001, 4 | "n_ctx": 1024, 5 | "n_embd": 768, 6 | "n_head": 12, 7 | "n_layer": 6, 8 | "n_positions": 1024, 9 | "vocab_size": 50257 10 | } -------------------------------------------------------------------------------- /transformers/.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=transformers 3 | omit = 4 | # skip convertion scripts from testing for now 5 | */convert_* 6 | */__main__.py 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | raise 11 | except 12 | register_parameter -------------------------------------------------------------------------------- /fairseq/examples/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | __version__ = '0.8.0' 7 | 8 | import examples.noisychannel # noqa 9 | -------------------------------------------------------------------------------- /fairseq/examples/roberta/commonsense_qa/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import commonsense_qa_task # noqa 7 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/lightconv_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .lightconv_layer import LightconvLayer # noqa 7 | -------------------------------------------------------------------------------- /transformers/requirements.txt: -------------------------------------------------------------------------------- 1 | # progress bars in model download and training scripts 2 | tqdm 3 | # Accessing files from S3 directly. 4 | boto3 5 | # Used for downloading models over HTTP 6 | requests 7 | # For OpenAI GPT 8 | regex 9 | # For XLNet 10 | sentencepiece 11 | # For XLM 12 | sacremoses -------------------------------------------------------------------------------- /fairseq/docs/modules.rst: -------------------------------------------------------------------------------- 1 | Modules 2 | ======= 3 | 4 | Fairseq provides several stand-alone :class:`torch.nn.Module` classes that may 5 | be helpful when implementing a new :class:`~fairseq.models.BaseFairseqModel`. 6 | 7 | .. automodule:: fairseq.modules 8 | :members: 9 | :undoc-members: 10 | -------------------------------------------------------------------------------- /fairseq/examples/roberta/wsc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import wsc_criterion # noqa 7 | from . import wsc_task # noqa 8 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/dynamicconv_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .dynamicconv_layer import DynamicconvLayer # noqa 7 | -------------------------------------------------------------------------------- /fairseq/fairseq/models/roberta/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .hub_interface import * # noqa 7 | from .model import * # noqa 8 | -------------------------------------------------------------------------------- /transformers/.github/ISSUE_TEMPLATE/question-help.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓Questions & Help" 3 | about: Start a general discussion related to PyTorch Transformers 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## ❓ Questions & Help 11 | 12 | 13 | -------------------------------------------------------------------------------- /fairseq/examples/speech_recognition/models/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | 4 | for file in os.listdir(os.path.dirname(__file__)): 5 | if file.endswith('.py') and not file.startswith('_'): 6 | model_name = file[:file.find('.py')] 7 | importlib.import_module('examples.speech_recognition.models.' + model_name) 8 | -------------------------------------------------------------------------------- /fairseq/examples/speech_recognition/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | 4 | for file in os.listdir(os.path.dirname(__file__)): 5 | if file.endswith('.py') and not file.startswith('_'): 6 | task_name = file[:file.find('.py')] 7 | importlib.import_module('examples.speech_recognition.tasks.' + task_name) 8 | -------------------------------------------------------------------------------- /fairseq/examples/speech_recognition/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .asr_dataset import AsrDataset 7 | 8 | __all__ = [ 9 | 'AsrDataset', 10 | ] 11 | -------------------------------------------------------------------------------- /transformers/examples/contrib/README.md: -------------------------------------------------------------------------------- 1 | # Community contributed examples 2 | 3 | This folder contains examples which are not actively maintained (mostly contributed by the community). 4 | 5 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working. 6 | -------------------------------------------------------------------------------- /fairseq/examples/speech_recognition/criterions/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | 4 | for file in os.listdir(os.path.dirname(__file__)): 5 | if file.endswith('.py') and not file.startswith('_'): 6 | criterion_name = file[:file.find('.py')] 7 | importlib.import_module('examples.speech_recognition.criterions.' + criterion_name) 8 | -------------------------------------------------------------------------------- /transformers/docs/source/_static/css/code-snippets.css: -------------------------------------------------------------------------------- 1 | 2 | .highlight .c1, .highlight .sd{ 3 | color: #999 4 | } 5 | 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc { 7 | color: #FB8D68; 8 | } 9 | 10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow { 11 | color: #6670FF; 12 | } -------------------------------------------------------------------------------- /transformers/transformers/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .processors import InputExample, InputFeatures, DataProcessor 2 | from .processors import glue_output_modes, glue_processors, glue_tasks_num_labels, glue_convert_examples_to_features 3 | 4 | from .metrics import is_sklearn_available 5 | if is_sklearn_available(): 6 | from .metrics import glue_compute_metrics 7 | -------------------------------------------------------------------------------- /web-demo/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export OUTPUT_DIR=/data/kalpesh/style_paraphrase_data 4 | 5 | mkdir -p $OUTPUT_DIR/generated_outputs/queue 6 | mkdir -p $OUTPUT_DIR/generated_outputs/inputs 7 | mkdir -p $OUTPUT_DIR/generated_outputs/final 8 | touch $OUTPUT_DIR/generated_outputs/queue/queue.txt 9 | 10 | cd strap-frontend 11 | npm install 12 | cd .. 13 | -------------------------------------------------------------------------------- /web-demo/strap-frontend/src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import 'bootstrap/dist/css/bootstrap.min.css'; 4 | import "bootstrap-slider/dist/css/bootstrap-slider.css" 5 | import './index.css'; 6 | import './toggle.css' 7 | import SquashDemo from './app'; 8 | 9 | 10 | ReactDOM.render(, document.getElementById('root')); 11 | -------------------------------------------------------------------------------- /mturk_evals/shakespeare_gold_vs_generated_single_model_nucleus_paraphrase/mturk_batches/batch_3.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | Is the worst well ?,Is 't the worst ? 3 | "Why , how now , Juliet ?","What , now , Juliet ?" 4 | I won't miss any chance to send my love to you .,Not to my marriage will I omit A chance to let my love speak . 5 | Tis late .,That's too late . 6 | "Monday , my lord .","Sia . , Monday , my lord ." 7 | -------------------------------------------------------------------------------- /transformers/examples/distillation/training_configs/distilbert-base-uncased.json: -------------------------------------------------------------------------------- 1 | { 2 | "activation": "gelu", 3 | "attention_dropout": 0.1, 4 | "dim": 768, 5 | "dropout": 0.1, 6 | "hidden_dim": 3072, 7 | "initializer_range": 0.02, 8 | "max_position_embeddings": 512, 9 | "n_heads": 12, 10 | "n_layers": 6, 11 | "sinusoidal_pos_embds": true, 12 | "tie_weights_": true, 13 | "vocab_size": 30522 14 | } 15 | -------------------------------------------------------------------------------- /web-demo/strap-frontend/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | } 10 | ], 11 | "start_url": ".", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /fairseq/fairseq/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import re 7 | 8 | SPACE_NORMALIZER = re.compile(r"\s+") 9 | 10 | 11 | def tokenize_line(line): 12 | line = SPACE_NORMALIZER.sub(" ", line) 13 | line = line.strip() 14 | return line.split() 15 | -------------------------------------------------------------------------------- /web-demo/strap-frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /web-demo/clean_queue.py: -------------------------------------------------------------------------------- 1 | import json 2 | import subprocess 3 | 4 | with open("config.json", "r") as f: 5 | configuration = json.loads(f.read()) 6 | OUTPUT_DIR = configuration["output_dir"] 7 | 8 | command = "rm {}/generated_outputs/queue/queue.txt".format(OUTPUT_DIR) 9 | print(subprocess.check_output(command, shell=True)) 10 | 11 | command = "touch {}/generated_outputs/queue/queue.txt".format(OUTPUT_DIR) 12 | print(subprocess.check_output(command, shell=True)) 13 | -------------------------------------------------------------------------------- /outputs/cds/README.md: -------------------------------------------------------------------------------- 1 | ## Style Transferred Outputs 2 | 3 | We style transfer each of the 1000 sentences in [`../data_samples`](../data_samples) to every other style and provide the outputs. These HTML files are best viewed using a browser. **WARNING**: These samples have not been filtered by profanity / toxicity and sentences might contain expletives or disturbing content since the original dataset contained them. This is primarily due to the the nature of our training dataset and pretraining data of GPT2. 4 | -------------------------------------------------------------------------------- /mturk_evals/shakespeare_gold_vs_generated_baseline_unmt/mturk_batches/batch_2.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | "Goodbye , Goodbye !","Adieu , dispatch ." 3 | Do not deny to him that you love me .,Don't let him know that you would love me . 4 | "But a lot of old people act like they're already dead , sluggish , slow , fat , and colorless , like lead .","But a proper grace of an old age , Who , like pale , and newly washed With variable objects , and not base , and peas , in paying a place ." 5 | Villain am I none .,"Fine , I am ." 6 | -------------------------------------------------------------------------------- /transformers/docs/source/main_classes/configuration.rst: -------------------------------------------------------------------------------- 1 | Configuration 2 | ---------------------------------------------------- 3 | 4 | The base class ``PretrainedConfig`` implements the common methods for loading/saving a configuration either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository). 5 | 6 | ``PretrainedConfig`` 7 | ~~~~~~~~~~~~~~~~~~~~~ 8 | 9 | .. autoclass:: transformers.PretrainedConfig 10 | :members: 11 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/num_samples_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import FairseqDataset 7 | 8 | 9 | class NumSamplesDataset(FairseqDataset): 10 | 11 | def __getitem__(self, index): 12 | return 1 13 | 14 | def __len__(self): 15 | return 0 16 | 17 | def collater(self, samples): 18 | return sum(samples) 19 | -------------------------------------------------------------------------------- /fairseq/scripts/spm_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | from __future__ import absolute_import, division, print_function, unicode_literals 9 | 10 | import sys 11 | 12 | import sentencepiece as spm 13 | 14 | 15 | if __name__ == "__main__": 16 | spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:])) 17 | -------------------------------------------------------------------------------- /fairseq/fairseq/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | __all__ = ['pdb'] 7 | __version__ = '0.8.0' 8 | 9 | import fairseq.criterions # noqa 10 | import fairseq.models # noqa 11 | import fairseq.modules # noqa 12 | import fairseq.optim # noqa 13 | import fairseq.optim.lr_scheduler # noqa 14 | import fairseq.pdb # noqa 15 | import fairseq.tasks # noqa 16 | -------------------------------------------------------------------------------- /data_samples/README.md: -------------------------------------------------------------------------------- 1 | ## The Corpus of Diverse Styles 2 | 3 | In this folder we present some samples from new dataset, the Corpus of Diverse Styles. We have 15 million sentences and 11 diverse styles in our dataset. 4 | 5 | We present 1000 sentences from each our of our eleven diverse styles in this folder. **WARNING**: These samples have not been filtered by profanity / toxicity and some sentences contain expletives or disturbing content. We recognize this issue with the dataset and the potential issues models trained on this data could have. 6 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/id_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | from . import FairseqDataset 9 | 10 | 11 | class IdDataset(FairseqDataset): 12 | 13 | def __getitem__(self, index): 14 | return index 15 | 16 | def __len__(self): 17 | return 0 18 | 19 | def collater(self, samples): 20 | return torch.tensor(samples) 21 | -------------------------------------------------------------------------------- /fairseq/scripts/compound_split_bleu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 1 ]; then 4 | echo "usage: $0 GENERATE_PY_OUTPUT" 5 | exit 1 6 | fi 7 | 8 | GEN=$1 9 | 10 | SYS=$GEN.sys 11 | REF=$GEN.ref 12 | 13 | if [ $(tail -n 1 $GEN | grep BLEU | wc -l) -ne 1 ]; then 14 | echo "not done generating" 15 | exit 16 | fi 17 | 18 | grep ^H $GEN | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $SYS 19 | grep ^T $GEN | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $REF 20 | fairseq-score --sys $SYS --ref $REF 21 | -------------------------------------------------------------------------------- /transformers/templates/adding_a_new_example_script/README.md: -------------------------------------------------------------------------------- 1 | # How to add a new example script in 🤗Transformers 2 | 3 | This folder provide a template for adding a new example script implementing a training or inference task with the models in the 🤗Transformers library. 4 | 5 | Currently only examples for PyTorch are provided which are adaptations of the library's SQuAD examples which implement single-GPU and distributed training with gradient accumulation and mixed-precision (using NVIDIA's apex library) to cover a reasonable range of use cases. 6 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/legacy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .masked_lm_dictionary import BertDictionary, MaskedLMDictionary 7 | from .block_pair_dataset import BlockPairDataset 8 | from .masked_lm_dataset import MaskedLMDataset 9 | 10 | __all__ = [ 11 | 'BertDictionary', 12 | 'BlockPairDataset', 13 | 'MaskedLMDataset', 14 | 'MaskedLMDictionary', 15 | ] 16 | -------------------------------------------------------------------------------- /style_paraphrase/style_classify/webapp/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=job_author_webapp 3 | #SBATCH -o /mnt/nfs/work1/miyyer/kalpesh/projects/style-embeddings/author-classify/webapp/log.txt 4 | #SBATCH --time=168:00:00 5 | #SBATCH --partition=titanx-long 6 | #SBATCH --gres=gpu:0 7 | #SBATCH --cpus-per-task=1 8 | #SBATCH --mem=1GB 9 | #SBATCH -d singleton 10 | 11 | cd /mnt/nfs/work1/miyyer/kalpesh/projects/style-embeddings/author-classify/webapp 12 | export FLASK_APP=app.py 13 | echo "Running flask app..." 14 | python -m flask run --port 5002 -------------------------------------------------------------------------------- /fairseq/fairseq/data/offset_tokens_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import BaseWrapperDataset 7 | 8 | 9 | class OffsetTokensDataset(BaseWrapperDataset): 10 | 11 | def __init__(self, dataset, offset): 12 | super().__init__(dataset) 13 | self.offset = offset 14 | 15 | def __getitem__(self, idx): 16 | return self.dataset[idx] + self.offset 17 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/grad_multiply.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | 9 | class GradMultiply(torch.autograd.Function): 10 | @staticmethod 11 | def forward(ctx, x, scale): 12 | ctx.scale = scale 13 | res = x.new(x) 14 | return res 15 | 16 | @staticmethod 17 | def backward(ctx, grad): 18 | return grad * ctx.scale, None 19 | -------------------------------------------------------------------------------- /datasets/bpe2text.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | import tqdm 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--input', type=str, default=None) 8 | parser.add_argument('--output', type=str, default=None) 9 | args = parser.parse_args() 10 | 11 | roberta = torch.hub.load('pytorch/fairseq', 'roberta.base') 12 | 13 | with open(args.input, "r") as f: 14 | data = f.read().strip().split("\n") 15 | 16 | data = [roberta.bpe.decode(x) for x in tqdm.tqdm(data)] 17 | 18 | with open(args.output, "w") as f: 19 | f.write("\n".join(data) + "\n") 20 | -------------------------------------------------------------------------------- /transformers/.github/ISSUE_TEMPLATE/---new-benchmark.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F5A5 New Benchmark" 3 | about: You benchmark a part of this library and would like to share your results 4 | title: "[Benchmark]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Benchmarking Transformers 11 | 12 | ## Benchmark 13 | 14 | Which part of Transformers did you benchmark? 15 | 16 | ## Set-up 17 | 18 | What did you run your benchmarks on? Please include details, such as: CPU, GPU? If using multiple GPUs, which parallelization did you use? 19 | 20 | ## Results 21 | 22 | Put your results here! 23 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/strip_token_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import BaseWrapperDataset 7 | 8 | 9 | class StripTokenDataset(BaseWrapperDataset): 10 | 11 | def __init__(self, dataset, id_to_strip): 12 | super().__init__(dataset) 13 | self.id_to_strip = id_to_strip 14 | 15 | def __getitem__(self, index): 16 | item = self.dataset[index] 17 | return item[item.ne(self.id_to_strip)] 18 | -------------------------------------------------------------------------------- /transformers/.github/ISSUE_TEMPLATE/--new-model-addition.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F31FNew model addition" 3 | about: Submit a proposal/request to implement a new Transformer-based model 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # 🌟New model addition 11 | 12 | ## Model description 13 | 14 | 15 | 16 | ## Open Source status 17 | 18 | * [ ] the model implementation is available: (give details) 19 | * [ ] the model weights are available: (give details) 20 | * [ ] who are the authors: (mention them) 21 | 22 | ## Additional context 23 | 24 | 25 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/unfold.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn.functional as F 7 | 8 | 9 | def unfold1d(x, kernel_size, padding_l, pad_value=0): 10 | '''unfold T x B x C to T x B x C x K''' 11 | if kernel_size > 1: 12 | T, B, C = x.size() 13 | x = F.pad(x, (0, 0, 0, 0, padding_l, kernel_size - 1 - padding_l), value=pad_value) 14 | x = x.as_strided((T, B, C, kernel_size), (B*C, C, 1, B*C)) 15 | else: 16 | x = x.unsqueeze(3) 17 | return x 18 | -------------------------------------------------------------------------------- /web-demo/strap-frontend/src/app.css: -------------------------------------------------------------------------------- 1 | .App { 2 | text-align: center; 3 | } 4 | 5 | .App-logo { 6 | animation: App-logo-spin infinite 20s linear; 7 | height: 40vmin; 8 | pointer-events: none; 9 | } 10 | 11 | .App-header { 12 | background-color: #282c34; 13 | min-height: 100vh; 14 | display: flex; 15 | flex-direction: column; 16 | align-items: center; 17 | justify-content: center; 18 | font-size: calc(10px + 2vmin); 19 | color: white; 20 | } 21 | 22 | .App-link { 23 | color: #61dafb; 24 | } 25 | 26 | @keyframes App-logo-spin { 27 | from { 28 | transform: rotate(0deg); 29 | } 30 | to { 31 | transform: rotate(360deg); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/encoders/space_tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import re 7 | 8 | from fairseq.data.encoders import register_tokenizer 9 | 10 | 11 | @register_tokenizer('space') 12 | class SpaceTokenizer(object): 13 | 14 | def __init__(self, source_lang=None, target_lang=None): 15 | self.space_tok = re.compile(r"\s+") 16 | 17 | def encode(self, x: str) -> str: 18 | return self.space_tok.sub(' ', x) 19 | 20 | def decode(self, x: str) -> str: 21 | return x 22 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/lightconv_layer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 9 | 10 | setup( 11 | name='lightconv_layer', 12 | ext_modules=[ 13 | CUDAExtension('lightconv_cuda', [ 14 | 'lightconv_cuda.cpp', 15 | 'lightconv_cuda_kernel.cu', 16 | ]), 17 | ], 18 | cmdclass={ 19 | 'build_ext': BuildExtension 20 | }) 21 | -------------------------------------------------------------------------------- /transformers/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /fairseq/fairseq/modules/layer_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | 9 | def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): 10 | if not export and torch.cuda.is_available(): 11 | try: 12 | from apex.normalization import FusedLayerNorm 13 | return FusedLayerNorm(normalized_shape, eps, elementwise_affine) 14 | except ImportError: 15 | pass 16 | return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine) 17 | -------------------------------------------------------------------------------- /datasets/bpe2binary.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | FOLDERNAME=$1 4 | 5 | fairseq-preprocess \ 6 | --only-source \ 7 | --trainpref "${FOLDERNAME}/train.label" \ 8 | --validpref "${FOLDERNAME}/dev.label" \ 9 | --destdir "${FOLDERNAME}-bin/label" \ 10 | --workers 24 11 | 12 | fairseq-preprocess \ 13 | --only-source \ 14 | --trainpref "${FOLDERNAME}/train.input0.bpe" \ 15 | --validpref "${FOLDERNAME}/dev.input0.bpe" \ 16 | --destdir "${FOLDERNAME}-bin/input0" \ 17 | --workers 24 \ 18 | --srcdict $ROBERTA_LARGE/dict.txt 19 | 20 | cp ${FOLDERNAME}-bin/label/dict.txt ${FOLDERNAME}-bin/dict.txt 21 | cp ${FOLDERNAME}-bin/label/dict.txt ${FOLDERNAME}/dict.txt 22 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/raw_label_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | from . import FairseqDataset 9 | 10 | 11 | class RawLabelDataset(FairseqDataset): 12 | 13 | def __init__(self, labels): 14 | super().__init__() 15 | self.labels = labels 16 | 17 | def __getitem__(self, index): 18 | return self.labels[index] 19 | 20 | def __len__(self): 21 | return len(self.labels) 22 | 23 | def collater(self, samples): 24 | return torch.tensor(samples) 25 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/lru_cache_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from functools import lru_cache 7 | 8 | from . import BaseWrapperDataset 9 | 10 | 11 | class LRUCacheDataset(BaseWrapperDataset): 12 | 13 | def __init__(self, dataset, token=None): 14 | super().__init__(dataset) 15 | 16 | @lru_cache(maxsize=8) 17 | def __getitem__(self, index): 18 | return self.dataset[index] 19 | 20 | @lru_cache(maxsize=8) 21 | def collater(self, samples): 22 | return self.dataset.collater(samples) 23 | -------------------------------------------------------------------------------- /fairseq/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = fairseq 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /fairseq/examples/roberta/commonsense_qa/download_cqa_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | OUTDIR=data/CommonsenseQA 8 | 9 | mkdir -p $OUTDIR 10 | 11 | wget -O $OUTDIR/train.jsonl https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl 12 | wget -O $OUTDIR/valid.jsonl https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl 13 | wget -O $OUTDIR/test.jsonl https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl 14 | wget -O $OUTDIR/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt 15 | -------------------------------------------------------------------------------- /fairseq/scripts/sacrebleu_pregen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 4 ]; then 4 | echo "usage: $0 TESTSET SRCLANG TGTLANG GEN" 5 | exit 1 6 | fi 7 | 8 | TESTSET=$1 9 | SRCLANG=$2 10 | TGTLANG=$3 11 | 12 | GEN=$4 13 | 14 | echo 'Cloning Moses github repository (for tokenization scripts)...' 15 | git clone https://github.com/moses-smt/mosesdecoder.git 16 | 17 | SCRIPTS=mosesdecoder/scripts 18 | DETOKENIZER=$SCRIPTS/tokenizer/detokenizer.perl 19 | 20 | grep ^H $GEN \ 21 | | sed 's/^H\-//' \ 22 | | sort -n -k 1 \ 23 | | cut -f 3 \ 24 | | perl $DETOKENIZER -l $TGTLANG \ 25 | | sed "s/ - /-/g" \ 26 | > $GEN.sorted.detok 27 | 28 | sacrebleu --test-set $TESTSET --language-pair "${SRCLANG}-${TGTLANG}" < $GEN.sorted.detok 29 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/sentence_order_label_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | from fairseq.data import data_utils 9 | 10 | from . import FairseqDataset 11 | 12 | 13 | class SentenceOrderLabelDataset(FairseqDataset): 14 | 15 | def __init__(self, order_labels): 16 | super().__init__() 17 | self.order_labels = [[int(y) for y in x.strip().split(",")] for x in order_labels] 18 | 19 | def __getitem__(self, index): 20 | return self.order_labels[index] 21 | 22 | def __len__(self): 23 | return len(self.order_labels) 24 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/sort_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | 8 | from . import BaseWrapperDataset 9 | 10 | 11 | class SortDataset(BaseWrapperDataset): 12 | 13 | def __init__(self, dataset, sort_order): 14 | super().__init__(dataset) 15 | if not isinstance(sort_order, (list, tuple)): 16 | sort_order = [sort_order] 17 | self.sort_order = sort_order 18 | 19 | assert all(len(so) == len(dataset) for so in sort_order) 20 | 21 | def ordered_indices(self): 22 | return np.lexsort(self.sort_order) 23 | -------------------------------------------------------------------------------- /transformers/.circleci/deploy.sh: -------------------------------------------------------------------------------- 1 | cd docs 2 | 3 | function deploy_doc(){ 4 | echo "Creating doc at commit $1 and pushing to folder $2" 5 | git checkout $1 6 | if [ ! -z "$2" ] 7 | then 8 | if [ -d "$dir/$2" ]; then 9 | echo "Directory" $2 "already exists" 10 | else 11 | echo "Pushing version" $2 12 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2 13 | fi 14 | else 15 | echo "Pushing master" 16 | make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir 17 | fi 18 | } 19 | 20 | deploy_doc "master" 21 | deploy_doc "b33a385" v1.0.0 22 | deploy_doc "fe02e45" v1.1.0 23 | deploy_doc "89fd345" v1.2.0 24 | deploy_doc "fc9faa8" v2.0.0 25 | deploy_doc "3ddce1d" v2.1.1 26 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/dynamicconv_layer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 9 | 10 | setup( 11 | name='dynamicconv_layer', 12 | ext_modules=[ 13 | CUDAExtension( 14 | name='dynamicconv_cuda', 15 | sources=[ 16 | 'dynamicconv_cuda.cpp', 17 | 'dynamicconv_cuda_kernel.cu', 18 | ], 19 | ), 20 | ], 21 | cmdclass={ 22 | 'build_ext': BuildExtension 23 | }) 24 | -------------------------------------------------------------------------------- /transformers/.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature Request" 3 | about: Submit a proposal/request for a new PyTorch Transformers feature 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 🚀 Feature 11 | 12 | 13 | 14 | ## Motivation 15 | 16 | 17 | 18 | ## Additional context 19 | 20 | 21 | -------------------------------------------------------------------------------- /transformers/.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 60 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 7 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | # Label to use when marking an issue as stale 10 | staleLabel: wontfix 11 | # Comment to post when marking an issue as stale. Set to `false` to disable 12 | markComment: > 13 | This issue has been automatically marked as stale because it has not had 14 | recent activity. It will be closed if no further activity occurs. Thank you 15 | for your contributions. 16 | # Comment to post when closing a stale issue. Set to `false` to disable 17 | closeComment: false -------------------------------------------------------------------------------- /transformers/docs/requirements.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.12 2 | Babel==2.7.0 3 | certifi==2019.6.16 4 | chardet==3.0.4 5 | commonmark==0.9.0 6 | docutils==0.14 7 | future==0.17.1 8 | idna==2.8 9 | imagesize==1.1.0 10 | Jinja2==2.10.1 11 | MarkupSafe==1.1.1 12 | packaging==19.0 13 | Pygments==2.4.2 14 | pyparsing==2.4.0 15 | pytz==2019.1 16 | recommonmark==0.5.0 17 | requests==2.22.0 18 | six==1.12.0 19 | snowballstemmer==1.9.0 20 | Sphinx==2.1.2 21 | sphinx-rtd-theme==0.4.3 22 | sphinxcontrib-applehelp==1.0.1 23 | sphinxcontrib-devhelp==1.0.1 24 | sphinxcontrib-htmlhelp==1.0.2 25 | sphinxcontrib-jsmath==1.0.1 26 | sphinxcontrib-qthelp==1.0.2 27 | sphinxcontrib-serializinghtml==1.1.3 28 | urllib3==1.25.3 29 | sphinx-markdown-tables==0.0.9 30 | numpy==1.17.2 31 | tensorflow==2.5.1 32 | torch==1.2.0 -------------------------------------------------------------------------------- /fairseq/fairseq/data/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import BaseWrapperDataset 7 | 8 | 9 | class ListDataset(BaseWrapperDataset): 10 | 11 | def __init__(self, dataset, sizes=None): 12 | super().__init__(dataset) 13 | self._sizes = sizes 14 | 15 | def collater(self, samples): 16 | return samples 17 | 18 | @property 19 | def sizes(self): 20 | return self._sizes 21 | 22 | def num_tokens(self, index): 23 | return self.sizes[index] 24 | 25 | def size(self, index): 26 | return self.sizes[index] 27 | 28 | def set_epoch(self, epoch): 29 | pass 30 | -------------------------------------------------------------------------------- /style_paraphrase/evaluation/scripts/flip_labels.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from style_paraphrase.evaluation.similarity.test_sim import find_similarity 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('--file1', type=str, default=None) 7 | parser.add_argument('--file2', type=str, default=None) 8 | parser.add_argument('--output_file', type=str, default=None) 9 | args = parser.parse_args() 10 | 11 | with open(args.file1, "r") as f: 12 | data1 = f.read().strip().split("\n") 13 | 14 | with open(args.file2, "r") as f: 15 | data2 = f.read().strip().split("\n") 16 | 17 | label1 = data1[0] 18 | label2 = data2[0] 19 | 20 | data1 = [label2 for _ in data1] 21 | data2 = [label1 for _ in data2] 22 | 23 | concat = data1 + data2 24 | 25 | with open(args.output_file, "w") as f: 26 | f.write("\n".join(concat) + "\n") 27 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/encoders/nltk_tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq.data.encoders import register_tokenizer 7 | 8 | 9 | @register_tokenizer('nltk') 10 | class NLTKTokenizer(object): 11 | 12 | def __init__(self, source_lang=None, target_lang=None): 13 | try: 14 | from nltk.tokenize import word_tokenize 15 | self.word_tokenize = word_tokenize 16 | except ImportError: 17 | raise ImportError('Please install nltk with: pip install nltk') 18 | 19 | def encode(self, x: str) -> str: 20 | return ' '.join(self.word_tokenize(x)) 21 | 22 | def decode(self, x: str) -> str: 23 | return x 24 | -------------------------------------------------------------------------------- /fairseq/tests/test_iterators.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import unittest 7 | 8 | from fairseq.data import iterators 9 | 10 | 11 | class TestIterators(unittest.TestCase): 12 | 13 | def test_counting_iterator(self): 14 | x = list(range(10)) 15 | itr = iterators.CountingIterator(x) 16 | self.assertTrue(itr.has_next()) 17 | self.assertEqual(next(itr), 0) 18 | self.assertEqual(next(itr), 1) 19 | itr.skip(3) 20 | self.assertEqual(next(itr), 5) 21 | itr.skip(3) 22 | self.assertEqual(next(itr), 9) 23 | self.assertFalse(itr.has_next()) 24 | 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /transformers/docs/source/main_classes/model.rst: -------------------------------------------------------------------------------- 1 | Models 2 | ---------------------------------------------------- 3 | 4 | The base class ``PreTrainedModel`` implements the common methods for loading/saving a model either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository). 5 | 6 | ``PreTrainedModel`` also implements a few methods which are common among all the models to: 7 | 8 | - resize the input token embeddings when new tokens are added to the vocabulary 9 | - prune the attention heads of the model. 10 | 11 | ``PreTrainedModel`` 12 | ~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.PreTrainedModel 15 | :members: 16 | 17 | ``TFPreTrainedModel`` 18 | ~~~~~~~~~~~~~~~~~~~~~ 19 | 20 | .. autoclass:: transformers.TFPreTrainedModel 21 | :members: 22 | -------------------------------------------------------------------------------- /fairseq/fairseq/criterions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import importlib 7 | import os 8 | 9 | from fairseq import registry 10 | from fairseq.criterions.fairseq_criterion import FairseqCriterion 11 | 12 | 13 | build_criterion, register_criterion, CRITERION_REGISTRY = registry.setup_registry( 14 | '--criterion', 15 | base_class=FairseqCriterion, 16 | default='cross_entropy', 17 | ) 18 | 19 | 20 | # automatically import any Python files in the criterions/ directory 21 | for file in os.listdir(os.path.dirname(__file__)): 22 | if file.endswith('.py') and not file.startswith('_'): 23 | module = file[:file.find('.py')] 24 | importlib.import_module('fairseq.criterions.' + module) 25 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import importlib 8 | import os 9 | 10 | from fairseq import registry 11 | 12 | 13 | build_tokenizer, register_tokenizer, TOKENIZER_REGISTRY = registry.setup_registry( 14 | '--tokenizer', 15 | default=None, 16 | ) 17 | 18 | 19 | build_bpe, register_bpe, BPE_REGISTRY = registry.setup_registry( 20 | '--bpe', 21 | default=None, 22 | ) 23 | 24 | 25 | # automatically import any Python files in the encoders/ directory 26 | for file in os.listdir(os.path.dirname(__file__)): 27 | if file.endswith('.py') and not file.startswith('_'): 28 | module = file[:file.find('.py')] 29 | importlib.import_module('fairseq.data.encoders.' + module) 30 | -------------------------------------------------------------------------------- /fairseq/hubconf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import functools 7 | 8 | from fairseq.hub_utils import BPEHubInterface as bpe # noqa 9 | from fairseq.hub_utils import TokenizerHubInterface as tokenizer # noqa 10 | from fairseq.models import MODEL_REGISTRY 11 | 12 | 13 | dependencies = [ 14 | 'numpy', 15 | 'regex', 16 | 'requests', 17 | 'torch', 18 | ] 19 | 20 | 21 | for _model_type, _cls in MODEL_REGISTRY.items(): 22 | for model_name in _cls.hub_models().keys(): 23 | globals()[model_name] = functools.partial( 24 | _cls.from_pretrained, 25 | model_name, 26 | ) 27 | # to simplify the interface we only expose named models 28 | # globals()[_model_type] = _cls.from_pretrained 29 | -------------------------------------------------------------------------------- /fairseq/docs/criterions.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | .. _Criterions: 5 | 6 | Criterions 7 | ========== 8 | 9 | Criterions compute the loss function given the model and batch, roughly:: 10 | 11 | loss = criterion(model, batch) 12 | 13 | .. automodule:: fairseq.criterions 14 | :members: 15 | 16 | .. autoclass:: fairseq.criterions.FairseqCriterion 17 | :members: 18 | :undoc-members: 19 | 20 | .. autoclass:: fairseq.criterions.adaptive_loss.AdaptiveLoss 21 | :members: 22 | :undoc-members: 23 | .. autoclass:: fairseq.criterions.composite_loss.CompositeLoss 24 | :members: 25 | :undoc-members: 26 | .. autoclass:: fairseq.criterions.cross_entropy.CrossEntropyCriterion 27 | :members: 28 | :undoc-members: 29 | .. autoclass:: fairseq.criterions.label_smoothed_cross_entropy.LabelSmoothedCrossEntropyCriterion 30 | :members: 31 | :undoc-members: 32 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/gelu.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | """ 6 | See "Gaussian Error Linear Units (GELUs)" by Dan Hendrycks and Kevin Gimpel with 7 | the corresponding GitHub repo: https://github.com/hendrycks/GELUs 8 | """ 9 | 10 | import math 11 | 12 | import torch 13 | 14 | 15 | def gelu_accurate(x): 16 | if not hasattr(gelu_accurate, "_a"): 17 | gelu_accurate._a = math.sqrt(2 / math.pi) 18 | return 0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3)))) 19 | 20 | 21 | def gelu(x: torch.Tensor) -> torch.Tensor: 22 | if hasattr(torch.nn.functional, 'gelu'): 23 | return torch.nn.functional.gelu(x.float()).type_as(x) 24 | else: 25 | return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) 26 | -------------------------------------------------------------------------------- /fairseq/fairseq/optim/lr_scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import importlib 7 | import os 8 | 9 | from fairseq import registry 10 | from fairseq.optim.lr_scheduler.fairseq_lr_scheduler import FairseqLRScheduler 11 | 12 | 13 | build_lr_scheduler, register_lr_scheduler, LR_SCHEDULER_REGISTRY = registry.setup_registry( 14 | '--lr-scheduler', 15 | base_class=FairseqLRScheduler, 16 | default='fixed', 17 | ) 18 | 19 | # automatically import any Python files in the optim/lr_scheduler/ directory 20 | for file in os.listdir(os.path.dirname(__file__)): 21 | if file.endswith('.py') and not file.startswith('_'): 22 | module = file[:file.find('.py')] 23 | importlib.import_module('fairseq.optim.lr_scheduler.' + module) 24 | -------------------------------------------------------------------------------- /datasets/dataset2bpe.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | import tqdm 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--dataset', type=str, default=None) 8 | args = parser.parse_args() 9 | 10 | roberta = torch.hub.load('pytorch/fairseq', 'roberta.base') 11 | 12 | for split in ["train", "dev", "test"]: 13 | data_path = os.path.join(args.dataset, split) + ".txt" 14 | label_path = os.path.join(args.dataset, split) + ".label" 15 | 16 | with open(data_path, "r") as f: 17 | data = f.read().strip().split("\n") 18 | 19 | with open(label_path, "r") as f: 20 | labels = f.read().strip().split("\n") 21 | 22 | assert len(data) == len(labels) 23 | 24 | data = [roberta.bpe.encode(x) for x in tqdm.tqdm(data)] 25 | 26 | output_path = os.path.join(args.dataset, split) + ".input0.bpe" 27 | 28 | with open(output_path, "w") as f: 29 | f.write("\n".join(data) + "\n") 30 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/numel_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from . import BaseWrapperDataset 10 | 11 | 12 | class NumelDataset(BaseWrapperDataset): 13 | 14 | def __init__(self, dataset, reduce=False): 15 | super().__init__(dataset) 16 | self.reduce = reduce 17 | 18 | def __getitem__(self, index): 19 | item = self.dataset[index] 20 | if torch.is_tensor(item): 21 | return torch.numel(item) 22 | else: 23 | return np.size(item) 24 | 25 | def __len__(self): 26 | return len(self.dataset) 27 | 28 | def collater(self, samples): 29 | if self.reduce: 30 | return sum(samples) 31 | else: 32 | return torch.tensor(samples) 33 | -------------------------------------------------------------------------------- /fairseq/scripts/convert_dictionary.lua: -------------------------------------------------------------------------------- 1 | -- Copyright (c) Facebook, Inc. and its affiliates. 2 | -- 3 | -- This source code is licensed under the MIT license found in the 4 | -- LICENSE file in the root directory of this source tree. 5 | -- 6 | -- Usage: convert_dictionary.lua 7 | require 'fairseq' 8 | require 'torch' 9 | require 'paths' 10 | 11 | if #arg < 1 then 12 | print('usage: convert_dictionary.lua ') 13 | os.exit(1) 14 | end 15 | if not paths.filep(arg[1]) then 16 | print('error: file does not exit: ' .. arg[1]) 17 | os.exit(1) 18 | end 19 | 20 | dict = torch.load(arg[1]) 21 | dst = paths.basename(arg[1]):gsub('.th7', '.txt') 22 | assert(dst:match('.txt$')) 23 | 24 | f = io.open(dst, 'w') 25 | for idx, symbol in ipairs(dict.index_to_symbol) do 26 | if idx > dict.cutoff then 27 | break 28 | end 29 | f:write(symbol) 30 | f:write(' ') 31 | f:write(dict.index_to_freq[idx]) 32 | f:write('\n') 33 | end 34 | f:close() 35 | -------------------------------------------------------------------------------- /transformers/docs/source/main_classes/tokenizer.rst: -------------------------------------------------------------------------------- 1 | Tokenizer 2 | ---------------------------------------------------- 3 | 4 | The base class ``PreTrainedTokenizer`` implements the common methods for loading/saving a tokenizer either from a local file or directory, or from a pretrained tokenizer provided by the library (downloaded from HuggingFace's AWS S3 repository). 5 | 6 | ``PreTrainedTokenizer`` is the main entry point into tokenizers as it also implements the main methods for using all the tokenizers: 7 | 8 | - tokenizing, converting tokens to ids and back and encoding/decoding, 9 | - adding new tokens to the vocabulary in a way that is independant of the underlying structure (BPE, SentencePiece...), 10 | - managing special tokens (adding them, assigning them to roles, making sure they are not split during tokenization) 11 | 12 | ``PreTrainedTokenizer`` 13 | ~~~~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: transformers.PreTrainedTokenizer 16 | :members: 17 | -------------------------------------------------------------------------------- /fairseq/docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=python -msphinx 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=fairseq 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed, 20 | echo.then set the SPHINXBUILD environment variable to point to the full 21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the 22 | echo.Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/colorize_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | from . import BaseWrapperDataset 9 | 10 | 11 | class ColorizeDataset(BaseWrapperDataset): 12 | """ Adds 'colors' property to net input that is obtained from the provided color getter for use by models """ 13 | def __init__(self, dataset, color_getter): 14 | super().__init__(dataset) 15 | self.color_getter = color_getter 16 | 17 | def collater(self, samples): 18 | base_collate = super().collater(samples) 19 | if len(base_collate) > 0: 20 | base_collate["net_input"]["colors"] = torch.tensor( 21 | list(self.color_getter(self.dataset, s["id"]) for s in samples), 22 | dtype=torch.long, 23 | ) 24 | return base_collate 25 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/logsumexp_moe.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | 9 | class LogSumExpMoE(torch.autograd.Function): 10 | """Standard LogSumExp forward pass, but use *posterior* for the backward. 11 | 12 | See `"Mixture Models for Diverse Machine Translation: Tricks of the Trade" 13 | (Shen et al., 2019) `_. 14 | """ 15 | 16 | @staticmethod 17 | def forward(ctx, logp, posterior, dim=-1): 18 | ctx.save_for_backward(posterior) 19 | ctx.dim = dim 20 | return torch.logsumexp(logp, dim=dim) 21 | 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | posterior, = ctx.saved_tensors 25 | grad_logp = grad_output.unsqueeze(ctx.dim) * posterior 26 | return grad_logp, None, None 27 | -------------------------------------------------------------------------------- /fairseq/fairseq/clib/libbleu/module.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2017-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | 12 | static PyMethodDef method_def[] = { 13 | {NULL, NULL, 0, NULL} 14 | }; 15 | 16 | static struct PyModuleDef module_def = { 17 | PyModuleDef_HEAD_INIT, 18 | "libbleu", /* name of module */ 19 | NULL, /* module documentation, may be NULL */ 20 | -1, /* size of per-interpreter state of the module, 21 | or -1 if the module keeps state in global variables. */ 22 | method_def 23 | }; 24 | 25 | 26 | #if PY_MAJOR_VERSION == 2 27 | PyMODINIT_FUNC init_libbleu() 28 | #else 29 | PyMODINIT_FUNC PyInit_libbleu() 30 | #endif 31 | { 32 | PyObject *m = PyModule_Create(&module_def); 33 | if (!m) { 34 | return NULL; 35 | } 36 | return m; 37 | } 38 | -------------------------------------------------------------------------------- /mturk_evals/formality_gold_vs_generated_single_nucleus_paraphrase/full_disagreement.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | I am sure you will both enjoy it .,Well I bet you both of you are happy . 3 | He just needs to R.E.S.P.E.C.T ... me & my pplz ... lol,"He just stopped getting back to us , me and my friends . . . me and my boyfriend . . ." 4 | "However , I 'm an unadulterated veggie and I 'm enamored of them .",But anyway I 'm veggie and I 'm enamored of her ! 5 | "I stopped liking rap , there are just too many people .","i don 't like rap anymore , it 's too many people liking it ." 6 | I don 't really know bout the kids though .,But I really don 't know the kids that are saying that . 7 | dont let anyone decide the fate but you .,You should not be forced to decide who will be in the fate of the world . 8 | is ther a goos tool for mixing songs ?,Is there a mixing tool to mix songs ? 9 | The Rock for hot steamy sex !,Hot steamy sex Rock by The Rombtones ! 10 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/pad_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq.data import data_utils 7 | 8 | from . import BaseWrapperDataset 9 | 10 | 11 | class PadDataset(BaseWrapperDataset): 12 | 13 | def __init__(self, dataset, pad_idx, left_pad): 14 | super().__init__(dataset) 15 | self.pad_idx = pad_idx 16 | self.left_pad = left_pad 17 | 18 | def collater(self, samples): 19 | return data_utils.collate_tokens(samples, self.pad_idx, left_pad=self.left_pad) 20 | 21 | 22 | class LeftPadDataset(PadDataset): 23 | 24 | def __init__(self, dataset, pad_idx): 25 | super().__init__(dataset, pad_idx, left_pad=True) 26 | 27 | 28 | class RightPadDataset(PadDataset): 29 | 30 | def __init__(self, dataset, pad_idx): 31 | super().__init__(dataset, pad_idx, left_pad=False) 32 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/transformerxl.rst: -------------------------------------------------------------------------------- 1 | Transformer XL 2 | ---------------------------------------------------- 3 | 4 | 5 | ``TransfoXLConfig`` 6 | ~~~~~~~~~~~~~~~~~~~~~ 7 | 8 | .. autoclass:: transformers.TransfoXLConfig 9 | :members: 10 | 11 | 12 | ``TransfoXLTokenizer`` 13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: transformers.TransfoXLTokenizer 16 | :members: 17 | 18 | 19 | ``TransfoXLModel`` 20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 21 | 22 | .. autoclass:: transformers.TransfoXLModel 23 | :members: 24 | 25 | 26 | ``TransfoXLLMHeadModel`` 27 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 28 | 29 | .. autoclass:: transformers.TransfoXLLMHeadModel 30 | :members: 31 | 32 | 33 | ``TFTransfoXLModel`` 34 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 35 | 36 | .. autoclass:: transformers.TFTransfoXLModel 37 | :members: 38 | 39 | 40 | ``TFTransfoXLLMHeadModel`` 41 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 42 | 43 | .. autoclass:: transformers.TFTransfoXLLMHeadModel 44 | :members: 45 | -------------------------------------------------------------------------------- /transformers/transformers/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # content of conftest.py 2 | 3 | import pytest 4 | 5 | 6 | def pytest_addoption(parser): 7 | parser.addoption( 8 | "--runslow", action="store_true", default=False, help="run slow tests" 9 | ) 10 | parser.addoption( 11 | "--use_cuda", action="store_true", default=False, help="run tests on gpu" 12 | ) 13 | 14 | 15 | def pytest_configure(config): 16 | config.addinivalue_line("markers", "slow: mark test as slow to run") 17 | 18 | 19 | def pytest_collection_modifyitems(config, items): 20 | if config.getoption("--runslow"): 21 | # --runslow given in cli: do not skip slow tests 22 | return 23 | skip_slow = pytest.mark.skip(reason="need --runslow option to run") 24 | for item in items: 25 | if "slow" in item.keywords: 26 | item.add_marker(skip_slow) 27 | 28 | @pytest.fixture 29 | def use_cuda(request): 30 | """ Run test on gpu """ 31 | return request.config.getoption("--use_cuda") 32 | -------------------------------------------------------------------------------- /fairseq/examples/language_model/prepare-wikitext-103.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh 3 | 4 | URLS=( 5 | "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip" 6 | ) 7 | FILES=( 8 | "wikitext-103-v1.zip" 9 | ) 10 | 11 | for ((i=0;i<${#URLS[@]};++i)); do 12 | file=${FILES[i]} 13 | if [ -f $file ]; then 14 | echo "$file already exists, skipping download" 15 | else 16 | url=${URLS[i]} 17 | wget "$url" 18 | if [ -f $file ]; then 19 | echo "$url successfully downloaded." 20 | else 21 | echo "$url not successfully downloaded." 22 | exit -1 23 | fi 24 | if [ ${file: -4} == ".tgz" ]; then 25 | tar zxvf $file 26 | elif [ ${file: -4} == ".tar" ]; then 27 | tar xvf $file 28 | elif [ ${file: -4} == ".zip" ]; then 29 | unzip $file 30 | fi 31 | fi 32 | done 33 | cd .. 34 | -------------------------------------------------------------------------------- /fairseq/docs/optim.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | .. _optimizers: 5 | 6 | Optimizers 7 | ========== 8 | 9 | Optimizers update the Model parameters based on the gradients. 10 | 11 | .. automodule:: fairseq.optim 12 | :members: 13 | 14 | .. autoclass:: fairseq.optim.FairseqOptimizer 15 | :members: 16 | :undoc-members: 17 | 18 | .. autoclass:: fairseq.optim.adadelta.Adadelta 19 | :members: 20 | :undoc-members: 21 | .. autoclass:: fairseq.optim.adagrad.Adagrad 22 | :members: 23 | :undoc-members: 24 | .. autoclass:: fairseq.optim.adafactor.FairseqAdafactor 25 | :members: 26 | :undoc-members: 27 | .. autoclass:: fairseq.optim.adam.FairseqAdam 28 | :members: 29 | :undoc-members: 30 | .. autoclass:: fairseq.optim.fp16_optimizer.FP16Optimizer 31 | :members: 32 | :undoc-members: 33 | .. autoclass:: fairseq.optim.nag.FairseqNAG 34 | :members: 35 | :undoc-members: 36 | .. autoclass:: fairseq.optim.sgd.SGD 37 | :members: 38 | :undoc-members: 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | bin/ 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Installer logs 24 | pip-log.txt 25 | pip-delete-this-directory.txt 26 | 27 | # Unit test / coverage reports 28 | .tox/ 29 | .coverage 30 | .cache 31 | nosetests.xml 32 | coverage.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # Rope 43 | .ropeproject 44 | 45 | # Django stuff: 46 | *.log 47 | *.pot 48 | 49 | # Sphinx documentation 50 | docs/_build/ 51 | 52 | style-venv 53 | 54 | .vscode 55 | runs/ 56 | datasets/ 57 | 58 | outputs/formality_backup 59 | outputs/baselines/dlsm_formality 60 | outputs/baselines/unmt_formality 61 | outputs/baselines/transform_delete_generate_formality 62 | 63 | input.tsv 64 | requirements-old.txt 65 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/truncate_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | 8 | from . import BaseWrapperDataset 9 | 10 | 11 | class TruncateDataset(BaseWrapperDataset): 12 | 13 | def __init__(self, dataset, truncation_length): 14 | super().__init__(dataset) 15 | assert truncation_length is not None 16 | self.truncation_length = truncation_length 17 | self.dataset = dataset 18 | 19 | def __getitem__(self, index): 20 | item = self.dataset[index] 21 | item_len = item.size(0) 22 | if item_len > self.truncation_length: 23 | item = item[:self.truncation_length] 24 | return item 25 | 26 | @property 27 | def sizes(self): 28 | return np.minimum(self.dataset.sizes, self.truncation_length) 29 | 30 | def __len__(self): 31 | return len(self.dataset) 32 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/auto.rst: -------------------------------------------------------------------------------- 1 | AutoModels 2 | ----------- 3 | 4 | In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you are supplying to the ``from_pretrained`` method. 5 | 6 | AutoClasses are here to do this job for you so that you automatically retreive the relevant model given the name/path to the pretrained weights/config/vocabulary: 7 | 8 | Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will directly create a class of the relevant architecture (ex: ``model = AutoModel.from_pretrained('bert-base-cased')`` will create a instance of ``BertModel``). 9 | 10 | 11 | ``AutoConfig`` 12 | ~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.AutoConfig 15 | :members: 16 | 17 | 18 | ``AutoModel`` 19 | ~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autoclass:: transformers.AutoModel 22 | :members: 23 | 24 | 25 | ``AutoTokenizer`` 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | .. autoclass:: transformers.AutoTokenizer 29 | :members: 30 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | std::vector dynamicconv_cpu_forward( 5 | float* input, 6 | float* filters, 7 | int padding_l); 8 | 9 | std::vector dynamicconv_cpu_backward( 10 | float* gradOutput, 11 | int padding_l, 12 | float* input, 13 | float* filters); 14 | 15 | std::vector dynamicconv_forward( 16 | float* input, 17 | float* filters, 18 | int padding_l) { 19 | 20 | return dynamicconv_cpu_forward(input, filters, padding_l); 21 | } 22 | 23 | std::vector dynamicconv_backward( 24 | float* gradOutput, 25 | int padding_l, 26 | float* input, 27 | float* filters) { 28 | 29 | return dynamicconv_cpu_backward(gradOutput, padding_l, input, filters); 30 | } 31 | 32 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 33 | m.def("forward", &dynamicconv_forward, "dynamicconv forward (CPU)"); 34 | m.def("backward", &dynamicconv_backward, "dynamicconv backward (CPU)"); 35 | } 36 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/scalar_bias.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | # 6 | 7 | import torch 8 | 9 | 10 | class ScalarBias(torch.autograd.Function): 11 | """ 12 | Adds a vector of scalars, used in self-attention mechanism to allow 13 | the model to optionally attend to this vector instead of the past 14 | """ 15 | 16 | @staticmethod 17 | def forward(ctx, input, dim, bias_init): 18 | size = list(input.size()) 19 | size[dim] += 1 20 | output = input.new(*size).fill_(bias_init) 21 | output.narrow(dim, 1, size[dim] - 1).copy_(input) 22 | ctx.dim = dim 23 | return output 24 | 25 | @staticmethod 26 | def backward(ctx, grad): 27 | return grad.narrow(ctx.dim, 1, grad.size(ctx.dim) - 1), None, None 28 | 29 | 30 | def scalar_bias(input, dim, bias_init=0): 31 | return ScalarBias.apply(input, dim, bias_init) 32 | -------------------------------------------------------------------------------- /transformers/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.8.0 2 | astor==0.8.0 3 | atomicwrites==1.3.0 4 | attrs==19.2.0 5 | boto3==1.9.243 6 | botocore==1.12.243 7 | certifi==2019.9.11 8 | chardet==3.0.4 9 | Click==7.0 10 | docutils==0.15.2 11 | gast==0.2.2 12 | google-pasta==0.1.7 13 | grpcio==1.24.1 14 | h5py==2.10.0 15 | idna==2.8 16 | importlib-metadata==0.23 17 | jmespath==0.9.4 18 | joblib==0.14.0 19 | Keras-Applications==1.0.8 20 | Keras-Preprocessing==1.1.0 21 | Markdown==3.1.1 22 | more-itertools==7.2.0 23 | numpy==1.17.2 24 | opt-einsum==3.1.0 25 | packaging==19.2 26 | pluggy==0.13.0 27 | protobuf==3.10.0 28 | py==1.8.0 29 | pyparsing==2.4.2 30 | pytest==5.2.1 31 | python-dateutil==2.8.0 32 | regex==2019.8.19 33 | requests==2.22.0 34 | s3transfer==0.2.1 35 | sacremoses==0.0.35 36 | sentencepiece==0.1.83 37 | six==1.12.0 38 | tensorboard==2.0.0 39 | tensorflow==2.5.1 40 | tensorflow-estimator==2.0.0 41 | termcolor==1.1.0 42 | torch==1.2.0 43 | tqdm==4.36.1 44 | urllib3==1.25.6 45 | wcwidth==0.1.7 46 | Werkzeug==0.16.0 47 | wrapt==1.11.2 48 | zipp==0.6.0 49 | -------------------------------------------------------------------------------- /mturk_evals/shakespeare_gold_vs_generated_baseline_he_2020/label_all_117_150.csv: -------------------------------------------------------------------------------- 1 | incorrect,modern,original 2 | incorrect,original,modern 3 | correct,original,original 4 | correct,original,original 5 | incorrect,original,modern 6 | incorrect,original,modern 7 | correct,original,original 8 | correct,original,original 9 | incorrect,original,modern 10 | incorrect,original,modern 11 | correct,original,original 12 | incorrect,original,modern 13 | incorrect,original,modern 14 | correct,original,original 15 | incorrect,modern,original 16 | incorrect,original,modern 17 | correct,modern,modern 18 | correct,modern,modern 19 | incorrect,original,modern 20 | correct,modern,modern 21 | incorrect,modern,original 22 | correct,original,original 23 | correct,modern,modern 24 | incorrect,original,modern 25 | incorrect,original,modern 26 | incorrect,original,modern 27 | correct,modern,modern 28 | incorrect,modern,original 29 | incorrect,modern,original 30 | correct,original,original 31 | incorrect,original,modern 32 | incorrect,original,modern 33 | incorrect,original,modern 34 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/prepend_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from . import BaseWrapperDataset 10 | 11 | 12 | class PrependDataset(BaseWrapperDataset): 13 | def __init__(self, dataset, prepend_getter, ensure_first_token_is=None): 14 | super().__init__(dataset) 15 | self.prepend_getter = prepend_getter 16 | self.ensure_first_token = ensure_first_token_is 17 | 18 | def __getitem__(self, idx): 19 | item = self.dataset[idx] 20 | is_tuple = isinstance(item, tuple) 21 | src = item[0] if is_tuple else item 22 | 23 | assert self.ensure_first_token is None or src[0] == self.ensure_first_token 24 | prepend_idx = self.prepend_getter(self.dataset, idx) 25 | assert isinstance(prepend_idx, int) 26 | src[0] = prepend_idx 27 | item = tuple((src,) + item[1:]) if is_tuple else src 28 | return item 29 | -------------------------------------------------------------------------------- /fairseq/fairseq/optim/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import importlib 7 | import os 8 | 9 | from fairseq import registry 10 | from fairseq.optim.fairseq_optimizer import FairseqOptimizer 11 | from fairseq.optim.fp16_optimizer import FP16Optimizer, MemoryEfficientFP16Optimizer 12 | from fairseq.optim.bmuf import FairseqBMUF # noqa 13 | 14 | 15 | __all__ = [ 16 | 'FairseqOptimizer', 17 | 'FP16Optimizer', 18 | 'MemoryEfficientFP16Optimizer', 19 | ] 20 | 21 | 22 | build_optimizer, register_optimizer, OPTIMIZER_REGISTRY = registry.setup_registry( 23 | '--optimizer', 24 | base_class=FairseqOptimizer, 25 | default='nag', 26 | ) 27 | 28 | 29 | # automatically import any Python files in the optim/ directory 30 | for file in os.listdir(os.path.dirname(__file__)): 31 | if file.endswith('.py') and not file.startswith('_'): 32 | module = file[:file.find('.py')] 33 | importlib.import_module('fairseq.optim.' + module) 34 | -------------------------------------------------------------------------------- /style_paraphrase/examples/test_custom_0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=finetune_gpt2_custom_0 3 | #SBATCH -o style_paraphrase/logs/log_custom_0.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition=m40-long 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --cpus-per-task=3 8 | #SBATCH --mem=50GB 9 | #SBATCH -d singleton 10 | 11 | export DATA_DIR=datasets/test_custom 12 | 13 | BASE_DIR=style_paraphrase 14 | 15 | python -m torch.distributed.launch --nproc_per_node=1 $BASE_DIR/run_lm_finetuning.py \ 16 | --output_dir=$BASE_DIR/saved_models/model_custom_0 \ 17 | --model_type=gpt2 \ 18 | --model_name_or_path=gpt2-large \ 19 | --do_train \ 20 | --data_dir=$DATA_DIR \ 21 | --save_steps 500 \ 22 | --logging_steps 20 \ 23 | --save_total_limit -1 \ 24 | --evaluate_during_training \ 25 | --num_train_epochs 3 \ 26 | --gradient_accumulation_steps 2 \ 27 | --per_gpu_train_batch_size 5 \ 28 | --job_id custom_0 \ 29 | --learning_rate 5e-5 \ 30 | --prefix_input_type paraphrase_250 \ 31 | --global_dense_feature_list none \ 32 | --specific_style_train 0 \ 33 | --optimizer adam 34 | 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Kalpesh Krishna 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /web-demo/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Kalpesh Krishna 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /web-demo/strap-frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "squash-demo", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "bootstrap": "^4.3.1", 7 | "google-analytics-js": "^0.1.2", 8 | "jquery": "^3.4.1", 9 | "react": "^16.8.6", 10 | "react-bootstrap-slider": "^2.2.2", 11 | "react-compound-slider": "^2.0.0", 12 | "react-dom": "^16.8.6", 13 | "react-ga": "^2.6.0", 14 | "react-helmet": "^5.2.1", 15 | "react-scripts": "3.0.1", 16 | "react-switch": "^5.0.1", 17 | "react-toggle": "^4.0.2", 18 | "reactstrap": "^8.0.0", 19 | "serve": "^11.1.0" 20 | }, 21 | "scripts": { 22 | "start": "react-scripts start", 23 | "build": "react-scripts build", 24 | "test": "react-scripts test", 25 | "eject": "react-scripts eject" 26 | }, 27 | "eslintConfig": { 28 | "extends": "react-app" 29 | }, 30 | "browserslist": { 31 | "production": [ 32 | ">0.2%", 33 | "not dead", 34 | "not op_mini all" 35 | ], 36 | "development": [ 37 | "last 1 chrome version", 38 | "last 1 firefox version", 39 | "last 1 safari version" 40 | ] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /fairseq/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /style_paraphrase/examples/run_finetune_inverse_paraphrase.sh: -------------------------------------------------------------------------------- 1 | export DATA_DIR=formality/formality_prior_detokenize 2 | rm -rf style_paraphrase/saved_models/test_inverse_paraphrase 3 | 4 | python -m torch.distributed.launch --nproc_per_node=1 style_paraphrase/run_lm_finetuning_dynamic.py \ 5 | --output_dir=style_paraphrase/saved_models/test_dynamic_roberta \ 6 | --model_type=gpt2 \ 7 | --model_name_or_path=gpt2-medium \ 8 | --data_dir=$DATA_DIR \ 9 | --do_eval \ 10 | --extra_embedding_dim=768 \ 11 | --save_steps 1000 \ 12 | --logging_steps 1 \ 13 | --save_total_limit 5 \ 14 | --evaluate_during_training \ 15 | --num_train_epochs 1 \ 16 | --gradient_accumulation_steps 8 \ 17 | --per_gpu_train_batch_size 1 \ 18 | --per_gpu_eval_batch_size 10 \ 19 | --roberta_ckpt_file model.pt \ 20 | --extra_embedding_dim 768 \ 21 | --do_train \ 22 | --switch_type constant \ 23 | --learning_rate 5e-5 \ 24 | --prefix_input_type "paraphrase_250" \ 25 | --context_noise "none" \ 26 | --global_dense_feature_list "none" \ 27 | --eval_all_checkpoints \ 28 | --limit_examples 300 \ 29 | --specific_style_train 0 \ 30 | --optimizer adam 31 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/gpt2.rst: -------------------------------------------------------------------------------- 1 | OpenAI GPT2 2 | ---------------------------------------------------- 3 | 4 | ``GPT2Config`` 5 | ~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | .. autoclass:: transformers.GPT2Config 8 | :members: 9 | 10 | 11 | ``GPT2Tokenizer`` 12 | ~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.GPT2Tokenizer 15 | :members: 16 | 17 | 18 | ``GPT2Model`` 19 | ~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autoclass:: transformers.GPT2Model 22 | :members: 23 | 24 | 25 | ``GPT2LMHeadModel`` 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | .. autoclass:: transformers.GPT2LMHeadModel 29 | :members: 30 | 31 | 32 | ``GPT2DoubleHeadsModel`` 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | .. autoclass:: transformers.GPT2DoubleHeadsModel 36 | :members: 37 | 38 | 39 | ``TFGPT2Model`` 40 | ~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | .. autoclass:: transformers.TFGPT2Model 43 | :members: 44 | 45 | 46 | ``TFGPT2LMHeadModel`` 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | .. autoclass:: transformers.TFGPT2LMHeadModel 50 | :members: 51 | 52 | 53 | ``TFGPT2DoubleHeadsModel`` 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | .. autoclass:: transformers.TFGPT2DoubleHeadsModel 57 | :members: 58 | -------------------------------------------------------------------------------- /style_paraphrase/evaluation/scripts/eval_shakespeare_baselines.sh: -------------------------------------------------------------------------------- 1 | # export CUDA_VISIBLE_DEVICES=0 2 | 3 | split="test" 4 | path=$1/transfer_entire_${split}.txt 5 | 6 | printf "\nRoBERTa ${split} classification\n\n" 7 | python style_paraphrase/evaluation/scripts/roberta_classify.py --input_file $path --label_file $1/all_test_transfer_labels.txt --model_dir style_paraphrase/evaluation/accuracy/shakespeare_classifier --model_data_dir style_paraphrase/evaluation/accuracy/shakespeare_classifier/shakespeare-data-bin 8 | 9 | printf "\nRoBERTa acceptability classification\n\n" 10 | python style_paraphrase/evaluation/scripts/acceptability.py --input_file $path 11 | 12 | printf "\nParaphrase scores --- generated vs gold..\n\n" 13 | python style_paraphrase/evaluation/scripts/get_paraphrase_similarity.py --generated_path ${path} --reference_strs reference --reference_paths ${1}/all_${split}_gold.txt --output_path ${1}/generated_vs_gold.txt --store_scores 14 | 15 | printf "\nnormalized paraphrase score vs gold..\n\n" 16 | python style_paraphrase/evaluation/scripts/micro_eval.py --classifier_file ${path}.roberta_labels --paraphrase_file ${path}.pp_scores --generated_file ${path} --acceptability_file ${path}.acceptability_labels 17 | -------------------------------------------------------------------------------- /fairseq/docs/lr_scheduler.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | .. _Learning Rate Schedulers: 5 | 6 | Learning Rate Schedulers 7 | ======================== 8 | 9 | Learning Rate Schedulers update the learning rate over the course of training. 10 | Learning rates can be updated after each update via :func:`step_update` or at 11 | epoch boundaries via :func:`step`. 12 | 13 | .. automodule:: fairseq.optim.lr_scheduler 14 | :members: 15 | 16 | .. autoclass:: fairseq.optim.lr_scheduler.FairseqLRScheduler 17 | :members: 18 | :undoc-members: 19 | 20 | .. autoclass:: fairseq.optim.lr_scheduler.cosine_lr_scheduler.CosineSchedule 21 | :members: 22 | :undoc-members: 23 | .. autoclass:: fairseq.optim.lr_scheduler.fixed_schedule.FixedSchedule 24 | :members: 25 | :undoc-members: 26 | .. autoclass:: fairseq.optim.lr_scheduler.inverse_square_root_schedule.InverseSquareRootSchedule 27 | :members: 28 | :undoc-members: 29 | .. autoclass:: fairseq.optim.lr_scheduler.reduce_lr_on_plateau.ReduceLROnPlateau 30 | :members: 31 | :undoc-members: 32 | .. autoclass:: fairseq.optim.lr_scheduler.triangular_lr_scheduler.TriangularSchedule 33 | :members: 34 | :undoc-members: 35 | -------------------------------------------------------------------------------- /fairseq/docs/index.rst: -------------------------------------------------------------------------------- 1 | .. fairseq documentation master file, created by 2 | sphinx-quickstart on Fri Aug 17 21:45:30 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | :github_url: https://github.com/pytorch/fairseq 7 | 8 | 9 | fairseq documentation 10 | ===================== 11 | 12 | Fairseq is a sequence modeling toolkit written in `PyTorch 13 | `_ that allows researchers and developers to 14 | train custom models for translation, summarization, language modeling and other 15 | text generation tasks. 16 | 17 | .. toctree:: 18 | :maxdepth: 1 19 | :caption: Getting Started 20 | 21 | getting_started 22 | command_line_tools 23 | 24 | .. toctree:: 25 | :maxdepth: 1 26 | :caption: Extending Fairseq 27 | 28 | overview 29 | tutorial_simple_lstm 30 | tutorial_classifying_names 31 | 32 | .. toctree:: 33 | :maxdepth: 2 34 | :caption: Library Reference 35 | 36 | tasks 37 | models 38 | criterions 39 | optim 40 | lr_scheduler 41 | data 42 | modules 43 | 44 | 45 | Indices and tables 46 | ================== 47 | 48 | * :ref:`genindex` 49 | * :ref:`search` 50 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/encoders/sentencepiece_bpe.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq import file_utils 7 | from fairseq.data.encoders import register_bpe 8 | 9 | 10 | @register_bpe('sentencepiece') 11 | class SentencepieceBPE(object): 12 | 13 | @staticmethod 14 | def add_args(parser): 15 | # fmt: off 16 | parser.add_argument('--sentencepiece-vocab', type=str, 17 | help='path to sentencepiece vocab') 18 | # fmt: on 19 | 20 | def __init__(self, args): 21 | vocab = file_utils.cached_path(args.sentencepiece_vocab) 22 | try: 23 | import sentencepiece as spm 24 | self.sp = spm.SentencePieceProcessor() 25 | self.sp.Load(vocab) 26 | except ImportError: 27 | raise ImportError('Please install sentencepiece with: pip install sentencepiece') 28 | 29 | def encode(self, x: str) -> str: 30 | return ' '.join(self.sp.EncodeAsPieces(x)) 31 | 32 | def decode(self, x: str) -> str: 33 | return x.replace(' ', '').replace('\u2581', ' ').strip() 34 | -------------------------------------------------------------------------------- /mturk_evals/formality_gold_vs_generated_baseline_he_2020/label_all_110_150.csv: -------------------------------------------------------------------------------- 1 | correct,formal,formal 2 | incorrect,formal,informal 3 | incorrect,informal,formal 4 | incorrect,formal,informal 5 | incorrect,informal,formal 6 | correct,formal,formal 7 | incorrect,informal,formal 8 | incorrect,informal,formal 9 | correct,formal,formal 10 | incorrect,informal,formal 11 | incorrect,informal,formal 12 | incorrect,informal,formal 13 | correct,formal,formal 14 | correct,informal,informal 15 | correct,formal,formal 16 | incorrect,informal,formal 17 | incorrect,formal,informal 18 | incorrect,informal,formal 19 | incorrect,informal,formal 20 | incorrect,informal,formal 21 | incorrect,informal,formal 22 | incorrect,informal,formal 23 | correct,formal,formal 24 | incorrect,informal,formal 25 | incorrect,formal,informal 26 | incorrect,informal,formal 27 | incorrect,formal,informal 28 | incorrect,informal,formal 29 | correct,informal,informal 30 | correct,informal,informal 31 | incorrect,formal,informal 32 | incorrect,formal,informal 33 | correct,formal,formal 34 | incorrect,informal,formal 35 | incorrect,formal,informal 36 | correct,informal,informal 37 | correct,informal,informal 38 | incorrect,formal,informal 39 | incorrect,informal,formal 40 | incorrect,formal,informal 41 | -------------------------------------------------------------------------------- /fairseq/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq) 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `master`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | ## License 26 | By contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq), 27 | you agree that your contributions will be licensed under the LICENSE file in 28 | the root directory of this source tree. 29 | -------------------------------------------------------------------------------- /fairseq/scripts/compare_namespaces.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Helper script to compare two argparse.Namespace objects.""" 3 | 4 | from argparse import Namespace # noqa 5 | 6 | 7 | def main(): 8 | 9 | ns1 = eval(input('Namespace 1: ')) 10 | ns2 = eval(input('Namespace 2: ')) 11 | 12 | def keys(ns): 13 | ks = set() 14 | for k in dir(ns): 15 | if not k.startswith('_'): 16 | ks.add(k) 17 | return ks 18 | 19 | k1 = keys(ns1) 20 | k2 = keys(ns2) 21 | 22 | def print_keys(ks, ns1, ns2=None): 23 | for k in ks: 24 | if ns2 is None: 25 | print('{}\t{}'.format(k, getattr(ns1, k, None))) 26 | else: 27 | print('{}\t{}\t{}'.format(k, getattr(ns1, k, None), getattr(ns2, k, None))) 28 | 29 | print('Keys unique to namespace 1:') 30 | print_keys(k1 - k2, ns1) 31 | print() 32 | 33 | print('Keys unique to namespace 2:') 34 | print_keys(k2 - k1, ns2) 35 | print() 36 | 37 | print('Overlapping keys with different values:') 38 | ks = [k for k in k1 & k2 if getattr(ns1, k, 'None') != getattr(ns2, k, 'None')] 39 | print_keys(ks, ns1, ns2) 40 | print() 41 | 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /fairseq/fairseq/tasks/translation_from_pretrained_xlm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary 7 | from fairseq.tasks.translation import TranslationTask 8 | 9 | from . import register_task 10 | 11 | 12 | @register_task("translation_from_pretrained_xlm") 13 | class TranslationFromPretrainedXLMTask(TranslationTask): 14 | """ 15 | Same as TranslationTask except use the MaskedLMDictionary class so that 16 | we can load data that was binarized with the MaskedLMDictionary class. 17 | 18 | This task should be used for the entire training pipeline when we want to 19 | train an NMT model from a pretrained XLM checkpoint: binarizing NMT data, 20 | training NMT with the pretrained XLM checkpoint, and subsequent evaluation 21 | of that trained model. 22 | """ 23 | 24 | @classmethod 25 | def load_dictionary(cls, filename): 26 | """Load the masked LM dictionary from the filename 27 | 28 | Args: 29 | filename (str): the filename 30 | """ 31 | return MaskedLMDictionary.load(filename) 32 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/encoders/fastbpe.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq import file_utils 7 | from fairseq.data.encoders import register_bpe 8 | 9 | 10 | @register_bpe('fastbpe') 11 | class fastBPE(object): 12 | 13 | @staticmethod 14 | def add_args(parser): 15 | # fmt: off 16 | parser.add_argument('--bpe-codes', type=str, 17 | help='path to fastBPE BPE') 18 | # fmt: on 19 | 20 | def __init__(self, args): 21 | if args.bpe_codes is None: 22 | raise ValueError('--bpe-codes is required for --bpe=subword_nmt') 23 | codes = file_utils.cached_path(args.bpe_codes) 24 | try: 25 | import fastBPE 26 | self.bpe = fastBPE.fastBPE(codes) 27 | self.bpe_symbol = "@@ " 28 | except ImportError: 29 | raise ImportError('Please install fastBPE with: pip install fastBPE') 30 | 31 | def encode(self, x: str) -> str: 32 | return self.bpe.apply([x])[0] 33 | 34 | def decode(self, x: str) -> str: 35 | return (x + ' ').replace(self.bpe_symbol, '').rstrip() 36 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/prepend_token_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from . import BaseWrapperDataset 10 | 11 | 12 | class PrependTokenDataset(BaseWrapperDataset): 13 | 14 | def __init__(self, dataset, token=None): 15 | super().__init__(dataset) 16 | self.token = token 17 | if token is not None: 18 | self._sizes = np.array(dataset.sizes) + 1 19 | else: 20 | self._sizes = dataset.sizes 21 | 22 | def __getitem__(self, idx): 23 | item = self.dataset[idx] 24 | if self.token is not None: 25 | item = torch.cat([item.new([self.token]), item]) 26 | return item 27 | 28 | @property 29 | def sizes(self): 30 | return self._sizes 31 | 32 | def num_tokens(self, index): 33 | n = self.dataset.num_tokens(index) 34 | if self.token is not None: 35 | n += 1 36 | return n 37 | 38 | def size(self, index): 39 | n = self.dataset.size(index) 40 | if self.token is not None: 41 | n += 1 42 | return n 43 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/ctrl.rst: -------------------------------------------------------------------------------- 1 | CTRL 2 | ---------------------------------------------------- 3 | 4 | Note: if you fine-tune a CTRL model using the Salesforce code (https://github.com/salesforce/ctrl), 5 | you'll be able to convert from TF to our HuggingFace/Transformers format using the 6 | ``convert_tf_to_huggingface_pytorch.py`` script (see `issue #1654 `_). 7 | 8 | 9 | ``CTRLConfig`` 10 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | .. autoclass:: transformers.CTRLConfig 13 | :members: 14 | 15 | 16 | ``CTRLTokenizer`` 17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | .. autoclass:: transformers.CTRLTokenizer 20 | :members: 21 | 22 | 23 | ``CTRLModel`` 24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. autoclass:: transformers.CTRLModel 27 | :members: 28 | 29 | 30 | ``CTRLLMHeadModel`` 31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | 33 | .. autoclass:: transformers.CTRLLMHeadModel 34 | :members: 35 | 36 | 37 | ``TFCTRLModel`` 38 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 39 | 40 | .. autoclass:: transformers.TFCTRLModel 41 | :members: 42 | 43 | 44 | ``TFCTRLLMHeadModel`` 45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | .. autoclass:: transformers.TFCTRLLMHeadModel 48 | :members: 49 | 50 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/roberta.rst: -------------------------------------------------------------------------------- 1 | RoBERTa 2 | ---------------------------------------------------- 3 | 4 | ``RobertaConfig`` 5 | ~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | .. autoclass:: transformers.RobertaConfig 8 | :members: 9 | 10 | 11 | ``RobertaTokenizer`` 12 | ~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.RobertaTokenizer 15 | :members: 16 | 17 | 18 | ``RobertaModel`` 19 | ~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autoclass:: transformers.RobertaModel 22 | :members: 23 | 24 | 25 | ``RobertaForMaskedLM`` 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | .. autoclass:: transformers.RobertaForMaskedLM 29 | :members: 30 | 31 | 32 | ``RobertaForSequenceClassification`` 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | .. autoclass:: transformers.RobertaForSequenceClassification 36 | :members: 37 | 38 | 39 | ``TFRobertaModel`` 40 | ~~~~~~~~~~~~~~~~~~~~ 41 | 42 | .. autoclass:: transformers.TFRobertaModel 43 | :members: 44 | 45 | 46 | ``TFRobertaForMaskedLM`` 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | .. autoclass:: transformers.TFRobertaForMaskedLM 50 | :members: 51 | 52 | 53 | ``TFRobertaForSequenceClassification`` 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | .. autoclass:: transformers.TFRobertaForSequenceClassification 57 | :members: 58 | -------------------------------------------------------------------------------- /fairseq/fairseq/pdb.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import multiprocessing 7 | import os 8 | import pdb 9 | import sys 10 | 11 | 12 | __all__ = ['set_trace'] 13 | 14 | 15 | _stdin = [None] 16 | _stdin_lock = multiprocessing.Lock() 17 | try: 18 | _stdin_fd = sys.stdin.fileno() 19 | except Exception: 20 | _stdin_fd = None 21 | 22 | 23 | class MultiprocessingPdb(pdb.Pdb): 24 | """A Pdb wrapper that works in a multiprocessing environment. 25 | 26 | Usage: `from fairseq import pdb; pdb.set_trace()` 27 | """ 28 | 29 | def __init__(self): 30 | pdb.Pdb.__init__(self, nosigint=True) 31 | 32 | def _cmdloop(self): 33 | stdin_bak = sys.stdin 34 | with _stdin_lock: 35 | try: 36 | if _stdin_fd is not None: 37 | if not _stdin[0]: 38 | _stdin[0] = os.fdopen(_stdin_fd) 39 | sys.stdin = _stdin[0] 40 | self.cmdloop() 41 | finally: 42 | sys.stdin = stdin_bak 43 | 44 | 45 | def set_trace(): 46 | pdb = MultiprocessingPdb() 47 | pdb.set_trace(sys._getframe().f_back) 48 | -------------------------------------------------------------------------------- /datasets/prepare_paraphrase_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | import os 4 | import random 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--input_file", default=None, type=str, 8 | help="Input TSV file.") 9 | parser.add_argument("--output_folder", default=None, type=str, 10 | help="Output folder.") 11 | parser.add_argument("--train_fraction", default=0.95, type=float, 12 | help="Fraction of pairs to put in training split.") 13 | args = parser.parse_args() 14 | 15 | with open(args.input_file, "r") as f: 16 | data = [x.split("\t") for x in f.read().strip().split("\n")] 17 | 18 | output_data = [] 19 | 20 | for dd in data: 21 | output_data.append(( 22 | None, None, None, dd[0], dd[1], None, None, None, None 23 | )) 24 | 25 | random.seed(43) 26 | random.shuffle(output_data) 27 | 28 | num_train = int(args.train_fraction * len(output_data)) 29 | train_data = output_data[:num_train] 30 | dev_data = output_data[num_train:] 31 | 32 | os.makedirs(args.output_folder, exist_ok=True) 33 | 34 | with open(os.path.join(args.output_folder, "train.pickle"), "wb") as f: 35 | pickle.dump(train_data, f) 36 | 37 | with open(os.path.join(args.output_folder, "dev.pickle"), "wb") as f: 38 | pickle.dump(dev_data, f) 39 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/gpt.rst: -------------------------------------------------------------------------------- 1 | OpenAI GPT 2 | ---------------------------------------------------- 3 | 4 | ``OpenAIGPTConfig`` 5 | ~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | .. autoclass:: transformers.OpenAIGPTConfig 8 | :members: 9 | 10 | 11 | ``OpenAIGPTTokenizer`` 12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.OpenAIGPTTokenizer 15 | :members: 16 | 17 | 18 | ``OpenAIGPTModel`` 19 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autoclass:: transformers.OpenAIGPTModel 22 | :members: 23 | 24 | 25 | ``OpenAIGPTLMHeadModel`` 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | .. autoclass:: transformers.OpenAIGPTLMHeadModel 29 | :members: 30 | 31 | 32 | ``OpenAIGPTDoubleHeadsModel`` 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | .. autoclass:: transformers.OpenAIGPTDoubleHeadsModel 36 | :members: 37 | 38 | 39 | ``TFOpenAIGPTModel`` 40 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | .. autoclass:: transformers.TFOpenAIGPTModel 43 | :members: 44 | 45 | 46 | ``TFOpenAIGPTLMHeadModel`` 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | .. autoclass:: transformers.TFOpenAIGPTLMHeadModel 50 | :members: 51 | 52 | 53 | ``TFOpenAIGPTDoubleHeadsModel`` 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | .. autoclass:: transformers.TFOpenAIGPTDoubleHeadsModel 57 | :members: 58 | -------------------------------------------------------------------------------- /transformers/.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug Report" 3 | about: Submit a bug report to help us improve PyTorch Transformers 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 🐛 Bug 11 | 12 | 13 | 14 | Model I am using (Bert, XLNet....): 15 | 16 | Language I am using the model on (English, Chinese....): 17 | 18 | The problem arise when using: 19 | * [ ] the official example scripts: (give details) 20 | * [ ] my own modified scripts: (give details) 21 | 22 | The tasks I am working on is: 23 | * [ ] an official GLUE/SQUaD task: (give the name) 24 | * [ ] my own task or dataset: (give details) 25 | 26 | ## To Reproduce 27 | 28 | Steps to reproduce the behavior: 29 | 30 | 1. 31 | 2. 32 | 3. 33 | 34 | 35 | 36 | ## Expected behavior 37 | 38 | 39 | 40 | ## Environment 41 | 42 | * OS: 43 | * Python version: 44 | * PyTorch version: 45 | * PyTorch Transformers version (or branch): 46 | * Using GPU ? 47 | * Distributed of parallel setup ? 48 | * Any other relevant information: 49 | 50 | ## Additional context 51 | 52 | 53 | -------------------------------------------------------------------------------- /style_paraphrase/evaluation/scripts/eval_formality_baselines.sh: -------------------------------------------------------------------------------- 1 | # export CUDA_VISIBLE_DEVICES=0 2 | 3 | split="test" 4 | path=$1/transfer_entire_${split}.txt 5 | 6 | printf "\nRoBERTa ${split} classification\n\n" 7 | python style_paraphrase/evaluation/scripts/roberta_classify.py --input_file $path --label_file $1/all_test_transfer_labels.txt --model_dir style_paraphrase/evaluation/accuracy/formality_classifier --model_data_dir style_paraphrase/evaluation/accuracy/formality_classifier/formality-data-bin 8 | 9 | printf "\nRoBERTa acceptability classification\n\n" 10 | python style_paraphrase/evaluation/scripts/acceptability.py --input_file $path 11 | 12 | printf "\nParaphrase scores --- generated vs gold..\n\n" 13 | python style_paraphrase/evaluation/scripts/get_paraphrase_similarity.py --generated_path ${path} --reference_strs ref0,ref1,ref2,ref3 --reference_paths datasets/formality/raw/${split}.ref0,datasets/formality/raw/${split}.ref1,datasets/formality/raw/${split}.ref2,datasets/formality/raw/${split}.ref3 --output_path ${1}/generated_vs_gold.txt --store_scores 14 | 15 | printf "\nnormalized paraphrase score vs gold..\n\n" 16 | python style_paraphrase/evaluation/scripts/micro_eval.py --classifier_file ${path}.roberta_labels --paraphrase_file ${path}.pp_scores --generated_file ${path} --acceptability_file ${path}.acceptability_labels 17 | -------------------------------------------------------------------------------- /fairseq/fairseq/optim/adagrad.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.optim 7 | 8 | from . import FairseqOptimizer, register_optimizer 9 | 10 | 11 | @register_optimizer('adagrad') 12 | class Adagrad(FairseqOptimizer): 13 | def __init__(self, args, params): 14 | super().__init__(args) 15 | self._optimizer = torch.optim.Adagrad(params, **self.optimizer_config) 16 | 17 | @staticmethod 18 | def add_args(parser): 19 | """Add optimizer-specific arguments to the parser.""" 20 | # fmt: off 21 | parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', 22 | help='weight decay') 23 | # fmt: on 24 | 25 | @property 26 | def optimizer_config(self): 27 | """ 28 | Return a kwarg dictionary that will be used to override optimizer 29 | args stored in checkpoints. This allows us to load a checkpoint and 30 | resume training using a different set of optimizer args, e.g., with a 31 | different learning rate. 32 | """ 33 | return { 34 | 'lr': self.args.lr[0], 35 | 'weight_decay': self.args.weight_decay, 36 | } 37 | -------------------------------------------------------------------------------- /style_paraphrase/run_evaluate_gpt2_template.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=eval_gpt2_{job_id} 3 | #SBATCH -o style_paraphrase/logs/log_eval_{job_id}.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition=titanx-long 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --cpus-per-task=5 8 | #SBATCH --mem=45GB 9 | #SBATCH -d singleton 10 | 11 | # Experiment Details :- {top_details} 12 | # Run Details :- {lower_details} 13 | 14 | export DATA_DIR={dataset} 15 | 16 | BASE_DIR=style_paraphrase 17 | 18 | python -m torch.distributed.launch --nproc_per_node=1 $BASE_DIR/run_lm_finetuning.py \ 19 | --output_dir=$BASE_DIR/saved_models/model_{job_id} \ 20 | --model_type=gpt2 \ 21 | --model_name_or_path={model_name} \ 22 | --data_dir=$DATA_DIR \ 23 | --do_eval \ 24 | --do_delete_old \ 25 | --save_steps 1000 \ 26 | --logging_steps 1000 \ 27 | --save_total_limit 3 \ 28 | --evaluate_during_training \ 29 | --num_train_epochs {num_epochs} \ 30 | --gradient_accumulation_steps {accumulation} \ 31 | --per_gpu_train_batch_size {batch_size} \ 32 | --limit_examples 1000 \ 33 | --job_id {job_id} \ 34 | --learning_rate {learning_rate} \ 35 | --prefix_input_type {prefix_input_type} \ 36 | --global_dense_feature_list {global_dense_feature_list} \ 37 | --specific_style_train {specific_style_train} \ 38 | --eval_frequency_min 30 39 | -------------------------------------------------------------------------------- /transformers/.github/ISSUE_TEMPLATE/migration.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F4DA Migration from PyTorch-pretrained-Bert" 3 | about: Report a problem when migrating from PyTorch-pretrained-Bert to Transformers 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 📚 Migration 11 | 12 | 13 | 14 | Model I am using (Bert, XLNet....): 15 | 16 | Language I am using the model on (English, Chinese....): 17 | 18 | The problem arise when using: 19 | * [ ] the official example scripts: (give details) 20 | * [ ] my own modified scripts: (give details) 21 | 22 | The tasks I am working on is: 23 | * [ ] an official GLUE/SQUaD task: (give the name) 24 | * [ ] my own task or dataset: (give details) 25 | 26 | Details of the issue: 27 | 28 | 29 | 30 | ## Environment 31 | 32 | * OS: 33 | * Python version: 34 | * PyTorch version: 35 | * PyTorch Transformers version (or branch): 36 | * Using GPU ? 37 | * Distributed of parallel setup ? 38 | * Any other relevant information: 39 | 40 | ## Checklist 41 | 42 | - [ ] I have read the migration guide in the readme. 43 | - [ ] I checked if a related official extension example runs on my machine. 44 | 45 | ## Additional context 46 | 47 | 48 | -------------------------------------------------------------------------------- /style_paraphrase/hyperparameters_config.py: -------------------------------------------------------------------------------- 1 | paraphrase = [ 2 | [('model_name',), ['gpt2-large']], 3 | [('dataset',), ["datasets/paranmt_filtered"]], 4 | [('batch_size',), [5]], 5 | [('accumulation',), [4]], 6 | [('num_epochs',), [2]], 7 | [('beam_size',), [1]], 8 | [('eval_batch_size',), [1]], 9 | [('learning_rate',), ["5e-5"]], 10 | [('gpu',), ["m40"]], 11 | [('ngpus',), ["1"]], 12 | [('prefix_input_type',), ["original"]], 13 | [('global_dense_feature_list',), ["none"]], 14 | [('stop_token',), ["eos"]], 15 | [('save_steps',), [500]], 16 | [('save_total_limit',), [-1]], 17 | [('specific_style_train',), [-1]], 18 | [('optimizer',), ["adam"]] 19 | ] 20 | 21 | inverse_paraphrase = [ 22 | [('model_name',), ['gpt2']], 23 | [('dataset',), ["datasets/formality"]], 24 | [('batch_size',), [5]], 25 | [('accumulation',), [2]], 26 | [('num_epochs',), [3]], 27 | [('beam_size',), [1]], 28 | [('eval_batch_size',), [1]], 29 | [('learning_rate',), ["5e-5"]], 30 | [('gpu',), ["m40"]], 31 | [('ngpus',), ["1"]], 32 | [('prefix_input_type',), ["paraphrase_250"]], 33 | [('global_dense_feature_list',), ["none"]], 34 | [('stop_token',), ["eos"]], 35 | [('specific_style_train',), [0, 1]], 36 | [('save_steps',), [500]], 37 | [('save_total_limit',), [-1]], 38 | [('optimizer',), ["adam"]] 39 | ] 40 | -------------------------------------------------------------------------------- /transformers/transformers/configuration_camembert.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. 3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """ CamemBERT configuration """ 17 | 18 | from __future__ import (absolute_import, division, print_function, 19 | unicode_literals) 20 | 21 | import logging 22 | 23 | from .configuration_roberta import RobertaConfig 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { 28 | 'camembert-base': "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-config.json", 29 | } 30 | 31 | 32 | class CamembertConfig(RobertaConfig): 33 | pretrained_config_archive_map = CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP 34 | -------------------------------------------------------------------------------- /transformers/examples/tests_samples/MRPC/dev.tsv: -------------------------------------------------------------------------------- 1 | Quality #1 ID #2 ID #1 String #2 String 2 | 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy . 3 | 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war . 4 | 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent . 5 | 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . 6 | 0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty . 7 | 1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status . 8 | -------------------------------------------------------------------------------- /transformers/examples/tests_samples/MRPC/train.tsv: -------------------------------------------------------------------------------- 1 | Quality #1 ID #2 ID #1 String #2 String 2 | 1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy . 3 | 0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war . 4 | 0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent . 5 | 1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries . 6 | 0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty . 7 | 1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status . 8 | -------------------------------------------------------------------------------- /fairseq/examples/language_model/transformer_lm/README.md: -------------------------------------------------------------------------------- 1 | # Adaptive Input Representations for Neural Language Modeling (Baevski and Auli, 2018) 2 | 3 | ## Pre-trained models 4 | 5 | Description | Parameters | Dataset | Model and Test set(s) 6 | ---|---:|---|--- 7 | Adaptive Inputs ([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853)) | 1026M | [Google Billion Words](https://github.com/ciprian-chelba/1-billion-word-language-modeling-benchmark) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_gbw_huge.bz2) 8 | Adaptive Inputs ([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853)) | 247M | [WikiText-103](https://einstein.ai/research/the-wikitext-long-term-dependency-language-modeling-dataset) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_wiki103.bz2) 9 | 10 | ## Example usage 11 | 12 | See the [language modeling README](../README.md) for instructions on reproducing results for WikiText-103 13 | using the `transformer_lm_wiki103` model architecture. 14 | 15 | ## Citation 16 | 17 | ```bibtex 18 | @inproceedings{ 19 | baevski2018adaptive, 20 | title={Adaptive Input Representations for Neural Language Modeling}, 21 | author={Alexei Baevski and Michael Auli}, 22 | booktitle={International Conference on Learning Representations}, 23 | year={2019}, 24 | url={https://openreview.net/forum?id=ByxZX20qFQ}, 25 | } 26 | ``` 27 | -------------------------------------------------------------------------------- /fairseq/examples/language_model/conv_lm/README.md: -------------------------------------------------------------------------------- 1 | # Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017) 2 | 3 | ## Example usage 4 | 5 | First download and preprocess the data following the main [language modeling 6 | README](../README.md). 7 | 8 | Then to train a convolutional LM using the `fconv_lm_dauphin_wikitext103` 9 | architecture: 10 | ```bash 11 | fairseq-train --task language_modeling \ 12 | data-bin/wikitext-103 \ 13 | --save-dir checkpoints/fconv_wikitext-103 \ 14 | --arch fconv_lm_dauphin_wikitext103 \ 15 | --max-epoch 35 \ --optimizer nag \ 16 | --lr 1.0 --lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \ 17 | --clip-norm 0.1 --dropout 0.2 --weight-decay 5e-06 --criterion adaptive_loss \ 18 | --adaptive-softmax-cutoff 10000,20000,200000 --max-tokens 1024 --tokens-per-sample 1024 \ 19 | --ddp-backend=no_c10d 20 | ``` 21 | 22 | And evaluate with: 23 | ```bash 24 | fairseq-eval-lm data-bin/wikitext-103 --path checkpoints/fconv_wiki103/checkpoint_best.pt 25 | ``` 26 | 27 | ## Citation 28 | 29 | ```bibtex 30 | @inproceedings{dauphin2017language, 31 | title={Language Modeling with Gated Convolutional Networks}, 32 | author={Dauphin, Yann N and Fan, Angela and Auli, Michael and Grangier, David}, 33 | booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70}, 34 | pages={933--941}, 35 | year={2017}, 36 | organization={JMLR} 37 | } 38 | ``` 39 | -------------------------------------------------------------------------------- /mturk_evals/shakespeare_gold_vs_generated_single_model_nucleus_paraphrase/label_all_104_150.csv: -------------------------------------------------------------------------------- 1 | incorrect,modern,original 2 | incorrect,modern,original 3 | correct,original,original 4 | incorrect,modern,original 5 | correct,original,original 6 | correct,original,original 7 | incorrect,modern,original 8 | incorrect,original,modern 9 | correct,modern,modern 10 | incorrect,modern,original 11 | incorrect,modern,original 12 | incorrect,modern,original 13 | incorrect,original,modern 14 | correct,modern,modern 15 | incorrect,original,modern 16 | correct,original,original 17 | incorrect,original,modern 18 | incorrect,original,modern 19 | incorrect,original,modern 20 | incorrect,original,modern 21 | incorrect,original,modern 22 | incorrect,original,modern 23 | incorrect,original,modern 24 | correct,original,original 25 | incorrect,original,modern 26 | incorrect,original,modern 27 | correct,original,original 28 | correct,modern,modern 29 | incorrect,original,modern 30 | correct,modern,modern 31 | incorrect,modern,original 32 | correct,original,original 33 | incorrect,modern,original 34 | incorrect,modern,original 35 | incorrect,original,modern 36 | incorrect,modern,original 37 | incorrect,original,modern 38 | incorrect,original,modern 39 | incorrect,original,modern 40 | correct,modern,modern 41 | correct,modern,modern 42 | incorrect,modern,original 43 | incorrect,original,modern 44 | incorrect,original,modern 45 | incorrect,original,modern 46 | incorrect,original,modern 47 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/positional_embedding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn as nn 7 | 8 | from .learned_positional_embedding import LearnedPositionalEmbedding 9 | from .sinusoidal_positional_embedding import SinusoidalPositionalEmbedding 10 | 11 | 12 | def PositionalEmbedding( 13 | num_embeddings: int, 14 | embedding_dim: int, 15 | padding_idx: int, 16 | learned: bool = False, 17 | ): 18 | if learned: 19 | # if padding_idx is specified then offset the embedding ids by 20 | # this index and adjust num_embeddings appropriately 21 | # TODO: The right place for this offset would be inside 22 | # LearnedPositionalEmbedding. Move this there for a cleaner implementation. 23 | if padding_idx is not None: 24 | num_embeddings = num_embeddings + padding_idx + 1 25 | m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx) 26 | nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) 27 | if padding_idx is not None: 28 | nn.init.constant_(m.weight[padding_idx], 0) 29 | else: 30 | m = SinusoidalPositionalEmbedding( 31 | embedding_dim, padding_idx, init_size=num_embeddings + padding_idx + 1, 32 | ) 33 | return m 34 | -------------------------------------------------------------------------------- /fairseq/docs/data.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | .. module:: fairseq.data 5 | 6 | Data Loading and Utilities 7 | ========================== 8 | 9 | .. _datasets: 10 | 11 | Datasets 12 | -------- 13 | 14 | **Datasets** define the data format and provide helpers for creating 15 | mini-batches. 16 | 17 | .. autoclass:: fairseq.data.FairseqDataset 18 | :members: 19 | .. autoclass:: fairseq.data.LanguagePairDataset 20 | :members: 21 | .. autoclass:: fairseq.data.MonolingualDataset 22 | :members: 23 | 24 | **Helper Datasets** 25 | 26 | These datasets wrap other :class:`fairseq.data.FairseqDataset` instances and 27 | provide additional functionality: 28 | 29 | .. autoclass:: fairseq.data.BacktranslationDataset 30 | :members: 31 | .. autoclass:: fairseq.data.ConcatDataset 32 | :members: 33 | .. autoclass:: fairseq.data.ResamplingDataset 34 | :members: 35 | .. autoclass:: fairseq.data.RoundRobinZipDatasets 36 | :members: 37 | .. autoclass:: fairseq.data.TransformEosDataset 38 | :members: 39 | 40 | 41 | Dictionary 42 | ---------- 43 | 44 | .. autoclass:: fairseq.data.Dictionary 45 | :members: 46 | 47 | 48 | Iterators 49 | --------- 50 | 51 | .. autoclass:: fairseq.data.CountingIterator 52 | :members: 53 | .. autoclass:: fairseq.data.EpochBatchIterator 54 | :members: 55 | .. autoclass:: fairseq.data.GroupedIterator 56 | :members: 57 | .. autoclass:: fairseq.data.ShardedIterator 58 | :members: 59 | -------------------------------------------------------------------------------- /paraphrase_many.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import torch 4 | import tqdm 5 | 6 | from style_paraphrase.inference_utils import GPT2Generator 7 | 8 | parser = argparse.ArgumentParser() 9 | 10 | # Base parameters 11 | parser.add_argument("--batch_size", default=64, type=int, 12 | help="Batch size for inference.") 13 | parser.add_argument('--model_dir', default="paraphraser_gpt2_large", type=str) 14 | parser.add_argument('--top_p_value', default=0.6, type=float) 15 | parser.add_argument("--input", default=None, type=str, required=True, 16 | help="The input file.") 17 | parser.add_argument("--output", default=None, type=str, required=True, 18 | help="The output file.") 19 | 20 | args = parser.parse_args() 21 | 22 | if not torch.cuda.is_available(): 23 | print("Please check if a GPU is available or your Pytorch installation is correct.") 24 | sys.exit() 25 | 26 | with open(args.input, "r") as f: 27 | data = f.read().strip().split("\n") 28 | 29 | print("Loading paraphraser...") 30 | paraphraser = GPT2Generator(args.model_dir, upper_length="same_5") 31 | paraphraser.modify_p(top_p=args.top_p_value) 32 | 33 | outputs = [] 34 | for i in tqdm.tqdm(range(0, len(data), args.batch_size), desc="minibatches done..."): 35 | generations, _ = paraphraser.generate_batch(data[i:i + args.batch_size]) 36 | outputs.extend(generations) 37 | 38 | with open(args.output, "w") as f: 39 | f.write("\n".join(outputs) + "\n") 40 | -------------------------------------------------------------------------------- /transformers/docs/source/bertology.rst: -------------------------------------------------------------------------------- 1 | BERTology 2 | --------- 3 | 4 | There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT (that some call "BERTology"). Some good examples of this field are: 5 | 6 | 7 | * BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick: https://arxiv.org/abs/1905.05950 8 | * Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650 9 | * What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D. Manning: https://arxiv.org/abs/1906.04341 10 | 11 | In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to help people access the inner representations, mainly adapted from the great work of Paul Michel (https://arxiv.org/abs/1905.10650): 12 | 13 | 14 | * accessing all the hidden-states of BERT/GPT/GPT-2, 15 | * accessing all the attention weights for each head of BERT/GPT/GPT-2, 16 | * retrieving heads output values and gradients to be able to compute head importance score and prune head as explained in https://arxiv.org/abs/1905.10650. 17 | 18 | To help you understand and use these features, we have added a specific example script: `bertology.py `_ while extract information and prune a model pre-trained on GLUE. 19 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/conv_tbc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | from torch.nn.modules.utils import _single 8 | 9 | 10 | class ConvTBC(torch.nn.Module): 11 | """1D convolution over an input of shape (time x batch x channel) 12 | 13 | The implementation uses gemm to perform the convolution. This implementation 14 | is faster than cuDNN for small kernel sizes. 15 | """ 16 | def __init__(self, in_channels, out_channels, kernel_size, padding=0): 17 | super(ConvTBC, self).__init__() 18 | self.in_channels = in_channels 19 | self.out_channels = out_channels 20 | self.kernel_size = _single(kernel_size) 21 | self.padding = _single(padding) 22 | 23 | self.weight = torch.nn.Parameter(torch.Tensor( 24 | self.kernel_size[0], in_channels, out_channels)) 25 | self.bias = torch.nn.Parameter(torch.Tensor(out_channels)) 26 | 27 | def forward(self, input): 28 | return torch.conv_tbc(input.contiguous(), self.weight, self.bias, self.padding[0]) 29 | 30 | def __repr__(self): 31 | s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}' 32 | ', padding={padding}') 33 | if self.bias is None: 34 | s += ', bias=False' 35 | s += ')' 36 | return s.format(name=self.__class__.__name__, **self.__dict__) 37 | -------------------------------------------------------------------------------- /fairseq/fairseq/optim/sgd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.optim 7 | 8 | from . import FairseqOptimizer, register_optimizer 9 | 10 | 11 | @register_optimizer('sgd') 12 | class SGD(FairseqOptimizer): 13 | def __init__(self, args, params): 14 | super().__init__(args) 15 | self._optimizer = torch.optim.SGD(params, **self.optimizer_config) 16 | 17 | @staticmethod 18 | def add_args(parser): 19 | """Add optimizer-specific arguments to the parser.""" 20 | # fmt: off 21 | parser.add_argument('--momentum', default=0.0, type=float, metavar='M', 22 | help='momentum factor') 23 | parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', 24 | help='weight decay') 25 | # fmt: on 26 | 27 | @property 28 | def optimizer_config(self): 29 | """ 30 | Return a kwarg dictionary that will be used to override optimizer 31 | args stored in checkpoints. This allows us to load a checkpoint and 32 | resume training using a different set of optimizer args, e.g., with a 33 | different learning rate. 34 | """ 35 | return { 36 | 'lr': self.args.lr[0], 37 | 'momentum': self.args.momentum, 38 | 'weight_decay': self.args.weight_decay, 39 | } 40 | -------------------------------------------------------------------------------- /style_paraphrase/run_finetune_gpt2_template.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=finetune_gpt2_{job_id} 3 | #SBATCH -o style_paraphrase/logs/log_{job_id}.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition={gpu}-long 6 | #SBATCH --gres=gpu:{ngpus} 7 | #SBATCH --cpus-per-task={cpus} 8 | #SBATCH --mem={memory}GB 9 | #SBATCH -d singleton 10 | 11 | # Experiment Details :- {top_details} 12 | # Run Details :- {lower_details} 13 | 14 | export DATA_DIR={dataset} 15 | 16 | BASE_DIR=style_paraphrase 17 | 18 | # Snapshot code used for the run 19 | mkdir -p $BASE_DIR/saved_models/model_{job_id}_code 20 | 21 | cp $BASE_DIR/*.py $BASE_DIR/saved_models/model_{job_id}_code 22 | 23 | echo $HOSTNAME 24 | 25 | python -m torch.distributed.launch --nproc_per_node={ngpus} $BASE_DIR/run_lm_finetuning.py \ 26 | --output_dir=$BASE_DIR/saved_models/model_{job_id} \ 27 | --model_type=gpt2 \ 28 | --model_name_or_path={model_name} \ 29 | --do_train \ 30 | --data_dir=$DATA_DIR \ 31 | --save_steps {save_steps} \ 32 | --logging_steps 20 \ 33 | --save_total_limit {save_total_limit} \ 34 | --evaluate_during_training \ 35 | --num_train_epochs {num_epochs} \ 36 | --gradient_accumulation_steps {accumulation} \ 37 | --per_gpu_train_batch_size {batch_size} \ 38 | --job_id {job_id} \ 39 | --learning_rate {learning_rate} \ 40 | --prefix_input_type {prefix_input_type} \ 41 | --global_dense_feature_list {global_dense_feature_list} \ 42 | --specific_style_train {specific_style_train} \ 43 | --optimizer {optimizer} 44 | -------------------------------------------------------------------------------- /fairseq/fairseq/optim/lr_scheduler/fairseq_lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .. import FairseqOptimizer 7 | 8 | 9 | class FairseqLRScheduler(object): 10 | 11 | def __init__(self, args, optimizer): 12 | super().__init__() 13 | if not isinstance(optimizer, FairseqOptimizer): 14 | raise ValueError('optimizer must be an instance of FairseqOptimizer') 15 | self.args = args 16 | self.optimizer = optimizer 17 | self.best = None 18 | 19 | @staticmethod 20 | def add_args(parser): 21 | """Add arguments to the parser for this LR scheduler.""" 22 | pass 23 | 24 | def state_dict(self): 25 | """Return the LR scheduler state dict.""" 26 | return {'best': self.best} 27 | 28 | def load_state_dict(self, state_dict): 29 | """Load an LR scheduler state dict.""" 30 | self.best = state_dict['best'] 31 | 32 | def step(self, epoch, val_loss=None): 33 | """Update the learning rate at the end of the given epoch.""" 34 | if val_loss is not None: 35 | if self.best is None: 36 | self.best = val_loss 37 | else: 38 | self.best = min(self.best, val_loss) 39 | 40 | def step_update(self, num_updates): 41 | """Update the learning rate after each update.""" 42 | return self.optimizer.get_lr() 43 | -------------------------------------------------------------------------------- /demo_paraphraser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | import torch 5 | 6 | from style_paraphrase.inference_utils import GPT2Generator 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--model_dir', default="paraphraser_gpt2_large", type=str) 10 | parser.add_argument('--top_p_value', default=0.6, type=float) 11 | args = parser.parse_args() 12 | 13 | if not torch.cuda.is_available(): 14 | print("Please check if a GPU is available or your Pytorch installation is correct.") 15 | sys.exit() 16 | 17 | print("Loading paraphraser...") 18 | paraphraser = GPT2Generator(args.model_dir, upper_length="same_5") 19 | 20 | print("\n\nNOTE: Ignore the weight mismatch error, this is due to different huggingface/transformer versions + minor modifications I did myself, shouldn't affect the paraphrases.\n\n") 21 | 22 | input_sentence = input("Enter your sentence, q to quit: ") 23 | 24 | while input_sentence != "q" and input_sentence != "quit" and input_sentence != "exit": 25 | paraphraser.modify_p(top_p=0.0) 26 | greedy_decoding = paraphraser.generate(input_sentence) 27 | print("\ngreedy sample:\n{}\n".format(greedy_decoding)) 28 | paraphraser.modify_p(top_p=args.top_p_value) 29 | top_p_60_samples, _ = paraphraser.generate_batch([input_sentence, input_sentence, input_sentence]) 30 | top_p_60_samples = "\n".join(top_p_60_samples) 31 | print("top_p = {:.2f} samples:\n{}\n".format(args.top_p_value, top_p_60_samples)) 32 | input_sentence = input("Enter your sentence, q to quit: ") 33 | 34 | print("Exiting...") 35 | -------------------------------------------------------------------------------- /mturk_evals/formality_gold_vs_generated_baseline_unmt/mturk_batches/batch_2.csv: -------------------------------------------------------------------------------- 1 | text1,text2 2 | 'I just called to say ' I Love you ' is just great !,I just wanna say " I just love you " is just a great song ! 3 | I need to know the name of the this movie from the 80 's !,I need to know the name of this movie from the movie " The Devil 's Rejects " . 4 | It 's one of the funniest cartoons !,It 's one of the greatest hits series ! 5 | I like both Lauren Hill and Mary J. Blige .,I like both of them and Big Brother ... but I like it . 6 | He would be well advised not to let her go .,i would be willing to do not let her go . 7 | "Ahh I love the hair metal ... Motley Crue , Poison , Cinderella , Faster Pussycat , Slaughter , etc .","I love the hair of the heavy metal . Young Frankenstein , etc ." 8 | She used to sing with a band called M @-@ People .,She used to sing with a band called @-@ men . 9 | I love and am a big fan of Green Day .,I love and I am a big fan of that . 10 | i want 2 listen 2 good music but i dont knw wat 2 listen 2 ! ! ! ! ! ! ! ! ! !,It is hard to listen to the music but I do not listen to the two points ! 11 | Orlando Bloom and Chad Michael Murray are both really attractive !,Orlando Bloom : They are both really sexy ! 12 | let 's get together yeah yeah yeah !,"Let 's get together , yes , they are !" 13 | my friends talk about me because i still blast his music through my apartment on a regular basis,My friends talk about me because I still enjoy his music on a record label . 14 | -------------------------------------------------------------------------------- /transformers/docs/source/main_classes/optimizer_schedules.rst: -------------------------------------------------------------------------------- 1 | Optimizer 2 | ---------------------------------------------------- 3 | 4 | The ``.optimization`` module provides: 5 | 6 | - an optimizer with weight decay fixed that can be used to fine-tuned models, and 7 | - several schedules in the form of schedule objects that inherit from ``_LRSchedule``: 8 | 9 | ``AdamW`` 10 | ~~~~~~~~~~~~~~~~ 11 | 12 | .. autoclass:: transformers.AdamW 13 | :members: 14 | 15 | Schedules 16 | ---------------------------------------------------- 17 | 18 | Learning Rate Schedules 19 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 20 | 21 | .. autofunction:: transformers.get_constant_schedule 22 | 23 | 24 | .. autofunction:: transformers.get_constant_schedule_with_warmup 25 | 26 | .. image:: /imgs/warmup_constant_schedule.png 27 | :target: /imgs/warmup_constant_schedule.png 28 | :alt: 29 | 30 | 31 | .. autofunction:: transformers.get_cosine_schedule_with_warmup 32 | :members: 33 | 34 | .. image:: /imgs/warmup_cosine_schedule.png 35 | :target: /imgs/warmup_cosine_schedule.png 36 | :alt: 37 | 38 | 39 | .. autofunction:: transformers.get_cosine_with_hard_restarts_schedule_with_warmup 40 | 41 | .. image:: /imgs/warmup_cosine_hard_restarts_schedule.png 42 | :target: /imgs/warmup_cosine_hard_restarts_schedule.png 43 | :alt: 44 | 45 | 46 | 47 | .. autofunction:: transformers.get_linear_schedule_with_warmup 48 | 49 | .. image:: /imgs/warmup_linear_schedule.png 50 | :target: /imgs/warmup_linear_schedule.png 51 | :alt: 52 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/replace_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import BaseWrapperDataset 7 | 8 | 9 | class ReplaceDataset(BaseWrapperDataset): 10 | """Replaces tokens found in the dataset by a specified replacement token 11 | 12 | Args: 13 | dataset (~torch.utils.data.Dataset): dataset to replace tokens in 14 | replace_map(Dictionary[int,int]): map of token to replace -> replacement token 15 | offsets (List[int]): do not replace tokens before (from left if pos, right if neg) this offset. should be 16 | as many as the number of objects returned by the underlying dataset __getitem__ method. 17 | """ 18 | 19 | def __init__(self, dataset, replace_map, offsets): 20 | super().__init__(dataset) 21 | assert len(replace_map) > 0 22 | self.replace_map = replace_map 23 | self.offsets = offsets 24 | 25 | def __getitem__(self, index): 26 | item = self.dataset[index] 27 | is_tuple = isinstance(item, tuple) 28 | srcs = item if is_tuple else [item] 29 | 30 | for offset, src in zip(self.offsets, srcs): 31 | for k, v in self.replace_map.items(): 32 | src_off = src[offset:] if offset >= 0 else src[:offset] 33 | src_off.masked_fill_(src_off == k, v) 34 | 35 | item = srcs if is_tuple else srcs[0] 36 | return item 37 | -------------------------------------------------------------------------------- /fairseq/scripts/spm_decode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | from __future__ import absolute_import, division, print_function, unicode_literals 9 | 10 | import argparse 11 | 12 | import sentencepiece as spm 13 | 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("--model", required=True, 18 | help="sentencepiece model to use for decoding") 19 | parser.add_argument("--input", required=True, help="input file to decode") 20 | parser.add_argument("--input_format", choices=["piece", "id"], default="piece") 21 | args = parser.parse_args() 22 | 23 | sp = spm.SentencePieceProcessor() 24 | sp.Load(args.model) 25 | 26 | if args.input_format == "piece": 27 | def decode(l): 28 | return "".join(sp.DecodePieces(l)) 29 | elif args.input_format == "id": 30 | def decode(l): 31 | return "".join(sp.DecodeIds(l)) 32 | else: 33 | raise NotImplementedError 34 | 35 | def tok2int(tok): 36 | # remap reference-side (represented as <>) to 0 37 | return int(tok) if tok != "<>" else 0 38 | 39 | with open(args.input, "r", encoding="utf-8") as h: 40 | for line in h: 41 | print(decode(list(map(tok2int, line.rstrip().split())))) 42 | 43 | 44 | if __name__ == "__main__": 45 | main() 46 | -------------------------------------------------------------------------------- /fairseq/fairseq/criterions/fairseq_criterion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from torch.nn.modules.loss import _Loss 7 | 8 | 9 | class FairseqCriterion(_Loss): 10 | 11 | def __init__(self, args, task): 12 | super().__init__() 13 | self.args = args 14 | self.task = task 15 | self.padding_idx = task.target_dictionary.pad() if task.target_dictionary is not None else -100 16 | 17 | @staticmethod 18 | def add_args(parser): 19 | """Add criterion-specific arguments to the parser.""" 20 | pass 21 | 22 | @classmethod 23 | def build_criterion(cls, args, task): 24 | return cls(args, task) 25 | 26 | def forward(self, model, sample, reduce=True): 27 | """Compute the loss for the given sample. 28 | 29 | Returns a tuple with three elements: 30 | 1) the loss 31 | 2) the sample size, which is used as the denominator for the gradient 32 | 3) logging outputs to display while training 33 | """ 34 | raise NotImplementedError 35 | 36 | @staticmethod 37 | def aggregate_logging_outputs(logging_outputs): 38 | """Aggregate logging outputs from data parallel training.""" 39 | raise NotImplementedError 40 | 41 | @staticmethod 42 | def grad_denom(sample_sizes): 43 | """Compute the gradient denominator for a set of sample sizes.""" 44 | return sum(sample_sizes) 45 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | blessings==1.7 2 | blis==0.4.1 3 | cached-property==1.5.2 4 | catalogue==1.0.0 5 | certifi==2020.6.20 6 | cffi==1.14.3 7 | chardet==3.0.4 8 | click==7.1.2 9 | cymem==2.0.4 10 | Cython==0.29.21 11 | dataclasses==0.6 12 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz 13 | filelock==3.0.12 14 | flake8==3.8.4 15 | Flask==1.1.2 16 | future==0.18.2 17 | gpustat==0.6.0 18 | idna==2.10 19 | importlib-metadata==2.0.0 20 | itsdangerous==1.1.0 21 | Jinja2==2.11.3 22 | joblib==0.17.0 23 | MarkupSafe==1.1.1 24 | mccabe==0.6.1 25 | more-itertools==8.6.0 26 | murmurhash==1.0.4 27 | nltk==3.5 28 | numpy==1.19.2 29 | nvidia-ml-py3==7.352.0 30 | ordered-set==3.1.1 31 | ordered-set-stubs==0.1.3 32 | packaging==20.4 33 | Paste==3.5.0 34 | pathtools==0.1.2 35 | Pillow==8.0.0 36 | plac==1.1.3 37 | poetry-version==0.1.5 38 | portalocker==2.0.0 39 | preshed==3.0.4 40 | profanity-filter==1.3.3 41 | protobuf==3.13.0 42 | psutil==5.7.3 43 | pycodestyle==2.6.0 44 | pycparser==2.20 45 | pydantic==1.7.4 46 | pyflakes==2.2.0 47 | pyparsing==2.4.7 48 | redis==3.5.3 49 | regex==2020.10.15 50 | requests==2.24.0 51 | ruamel.yaml==0.15.100 52 | sacrebleu==1.4.14 53 | sacremoses==0.0.43 54 | scipy==1.5.4 55 | sentencepiece==0.1.91 56 | six==1.15.0 57 | spacy==2.3.2 58 | srsly==1.0.3 59 | tensorboardX==2.1 60 | thinc==7.4.1 61 | tokenizers==0.9.2 62 | tomlkit==0.5.11 63 | tqdm==4.50.2 64 | transformers==3.4.0 65 | urllib3==1.25.11 66 | waitress==1.4.4 67 | wasabi==0.8.0 68 | watchdog==0.10.3 69 | Werkzeug==1.0.1 70 | zipp==3.4.0 71 | -------------------------------------------------------------------------------- /fairseq/scripts/read_binarized.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import argparse 8 | 9 | from fairseq.data import data_utils, Dictionary, indexed_dataset 10 | 11 | 12 | def get_parser(): 13 | parser = argparse.ArgumentParser( 14 | description='writes text from binarized file to stdout') 15 | # fmt: off 16 | parser.add_argument('--dataset-impl', help='dataset implementation', 17 | choices=indexed_dataset.get_available_dataset_impl()) 18 | parser.add_argument('--dict', metavar='FP', help='dictionary containing known words', default=None) 19 | parser.add_argument('--input', metavar='FP', required=True, help='binarized file to read') 20 | # fmt: on 21 | 22 | return parser 23 | 24 | 25 | def main(): 26 | parser = get_parser() 27 | args = parser.parse_args() 28 | 29 | dictionary = Dictionary.load(args.dict) if args.dict is not None else None 30 | dataset = data_utils.load_indexed_dataset( 31 | args.input, 32 | dictionary, 33 | dataset_impl=args.dataset_impl, 34 | default='lazy', 35 | ) 36 | 37 | for tensor_line in dataset: 38 | if dictionary is None: 39 | line = ' '.join([str(int(x)) for x in tensor_line]) 40 | else: 41 | line = dictionary.string(tensor_line) 42 | 43 | print(line) 44 | 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /style_paraphrase/evaluation/similarity/test_sim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from style_paraphrase.evaluation.similarity.sim_models import WordAveraging 3 | from style_paraphrase.evaluation.similarity.sim_utils import Example 4 | from nltk.tokenize import TreebankWordTokenizer 5 | import sentencepiece as spm 6 | 7 | tok = TreebankWordTokenizer() 8 | 9 | model = torch.load('style_paraphrase/evaluation/similarity/sim/sim.pt') 10 | state_dict = model['state_dict'] 11 | vocab_words = model['vocab_words'] 12 | args = model['args'] 13 | # turn off gpu 14 | model = WordAveraging(args, vocab_words) 15 | model.load_state_dict(state_dict, strict=True) 16 | sp = spm.SentencePieceProcessor() 17 | sp.Load('style_paraphrase/evaluation/similarity/sim/sim.sp.30k.model') 18 | model.eval() 19 | 20 | def make_example(sentence, model): 21 | sentence = sentence.lower() 22 | sentence = " ".join(tok.tokenize(sentence)) 23 | sentence = sp.EncodeAsPieces(sentence) 24 | wp1 = Example(" ".join(sentence)) 25 | wp1.populate_embeddings(model.vocab) 26 | return wp1 27 | 28 | def find_similarity(s1, s2): 29 | with torch.no_grad(): 30 | s1 = [make_example(x, model) for x in s1] 31 | s2 = [make_example(x, model) for x in s2] 32 | wx1, wl1, wm1 = model.torchify_batch(s1) 33 | wx2, wl2, wm2 = model.torchify_batch(s2) 34 | scores = model.scoring_function(wx1, wm1, wl1, wx2, wm2, wl2) 35 | return [x.item() for x in scores] 36 | 37 | # s1 = "the dog ran outsideddd." 38 | # s2 = "the puppy escape into the trees." 39 | # print(find_similarity([s1, s2], [s2, s2])) 40 | -------------------------------------------------------------------------------- /style_paraphrase/examples/formality/run_finetune_formality_0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=finetune_gpt2_formality_0 3 | #SBATCH -o style_paraphrase/logs/log_formality_0.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition=m40-long 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --cpus-per-task=3 8 | #SBATCH --mem=50GB 9 | #SBATCH -d singleton 10 | 11 | # Experiment Details :- GPT2 model for formality. 12 | # Run Details :- accumulation = 2, batch_size = 5, beam_size = 1, cpus = 3, dataset = datasets/formality, eval_batch_size = 1, global_dense_feature_list = none, gpu = m40, learning_rate = 5e-5, memory = 50, model_name = gpt2, ngpus = 1, num_epochs = 3, optimizer = adam, prefix_input_type = paraphrase_250, save_steps = 500, save_total_limit = -1, specific_style_train = 0, stop_token = eos 13 | 14 | export DATA_DIR=datasets/formality 15 | 16 | BASE_DIR=style_paraphrase 17 | 18 | python -m torch.distributed.launch --nproc_per_node=1 $BASE_DIR/run_lm_finetuning.py \ 19 | --output_dir=$BASE_DIR/saved_models/model_formality_0 \ 20 | --model_type=gpt2 \ 21 | --model_name_or_path=gpt2 \ 22 | --do_train \ 23 | --data_dir=$DATA_DIR \ 24 | --save_steps 500 \ 25 | --logging_steps 20 \ 26 | --save_total_limit -1 \ 27 | --evaluate_during_training \ 28 | --num_train_epochs 3 \ 29 | --gradient_accumulation_steps 2 \ 30 | --per_gpu_train_batch_size 5 \ 31 | --job_id formality_0 \ 32 | --learning_rate 5e-5 \ 33 | --prefix_input_type paraphrase_250 \ 34 | --global_dense_feature_list none \ 35 | --specific_style_train 0 \ 36 | --optimizer adam 37 | 38 | -------------------------------------------------------------------------------- /style_paraphrase/examples/formality/run_finetune_formality_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=finetune_gpt2_formality_1 3 | #SBATCH -o style_paraphrase/logs/log_formality_1.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition=m40-long 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --cpus-per-task=3 8 | #SBATCH --mem=50GB 9 | #SBATCH -d singleton 10 | 11 | # Experiment Details :- GPT2 model for formality. 12 | # Run Details :- accumulation = 2, batch_size = 5, beam_size = 1, cpus = 3, dataset = datasets/formality, eval_batch_size = 1, global_dense_feature_list = none, gpu = m40, learning_rate = 5e-5, memory = 50, model_name = gpt2, ngpus = 1, num_epochs = 3, optimizer = adam, prefix_input_type = paraphrase_250, save_steps = 500, save_total_limit = -1, specific_style_train = 1, stop_token = eos 13 | 14 | export DATA_DIR=datasets/formality 15 | 16 | BASE_DIR=style_paraphrase 17 | 18 | python -m torch.distributed.launch --nproc_per_node=1 $BASE_DIR/run_lm_finetuning.py \ 19 | --output_dir=$BASE_DIR/saved_models/model_formality_1 \ 20 | --model_type=gpt2 \ 21 | --model_name_or_path=gpt2 \ 22 | --do_train \ 23 | --data_dir=$DATA_DIR \ 24 | --save_steps 500 \ 25 | --logging_steps 20 \ 26 | --save_total_limit -1 \ 27 | --evaluate_during_training \ 28 | --num_train_epochs 3 \ 29 | --gradient_accumulation_steps 2 \ 30 | --per_gpu_train_batch_size 5 \ 31 | --job_id formality_1 \ 32 | --learning_rate 5e-5 \ 33 | --prefix_input_type paraphrase_250 \ 34 | --global_dense_feature_list none \ 35 | --specific_style_train 1 \ 36 | --optimizer adam 37 | 38 | -------------------------------------------------------------------------------- /fairseq/tests/speech_recognition/test_cross_entropy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from examples.speech_recognition.criterions.cross_entropy_acc import CrossEntropyWithAccCriterion 8 | from .asr_test_base import CrossEntropyCriterionTestBase 9 | 10 | 11 | class CrossEntropyWithAccCriterionTest(CrossEntropyCriterionTestBase): 12 | def setUp(self): 13 | self.criterion_cls = CrossEntropyWithAccCriterion 14 | super().setUp() 15 | 16 | def test_cross_entropy_all_correct(self): 17 | sample = self.get_test_sample(correct=True, soft_target=False, aggregate=False) 18 | loss, sample_size, logging_output = self.criterion( 19 | self.model, sample, "sum", log_probs=True 20 | ) 21 | assert logging_output["correct"] == 20 22 | assert logging_output["total"] == 20 23 | assert logging_output["sample_size"] == 20 24 | assert logging_output["ntokens"] == 20 25 | 26 | def test_cross_entropy_all_wrong(self): 27 | sample = self.get_test_sample(correct=False, soft_target=False, aggregate=False) 28 | loss, sample_size, logging_output = self.criterion( 29 | self.model, sample, "sum", log_probs=True 30 | ) 31 | assert logging_output["correct"] == 0 32 | assert logging_output["total"] == 20 33 | assert logging_output["sample_size"] == 20 34 | assert logging_output["ntokens"] == 20 35 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/base_wrapper_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from torch.utils.data.dataloader import default_collate 7 | 8 | from . import FairseqDataset 9 | 10 | 11 | class BaseWrapperDataset(FairseqDataset): 12 | 13 | def __init__(self, dataset): 14 | super().__init__() 15 | self.dataset = dataset 16 | 17 | def __getitem__(self, index): 18 | return self.dataset[index] 19 | 20 | def __len__(self): 21 | return len(self.dataset) 22 | 23 | def collater(self, samples): 24 | if hasattr(self.dataset, 'collater'): 25 | return self.dataset.collater(samples) 26 | else: 27 | return default_collate(samples) 28 | 29 | @property 30 | def sizes(self): 31 | return self.dataset.sizes 32 | 33 | def num_tokens(self, index): 34 | return self.dataset.num_tokens(index) 35 | 36 | def size(self, index): 37 | return self.dataset.size(index) 38 | 39 | def ordered_indices(self): 40 | return self.dataset.ordered_indices() 41 | 42 | @property 43 | def supports_prefetch(self): 44 | return getattr(self.dataset, 'supports_prefetch', False) 45 | 46 | def prefetch(self, indices): 47 | self.dataset.prefetch(indices) 48 | 49 | def set_epoch(self, epoch): 50 | super().set_epoch(epoch) 51 | if hasattr(self.dataset, 'set_epoch'): 52 | self.dataset.set_epoch(epoch) 53 | -------------------------------------------------------------------------------- /fairseq/fairseq/models/fairseq_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class FairseqEncoder(nn.Module): 10 | """Base class for encoders.""" 11 | 12 | def __init__(self, dictionary): 13 | super().__init__() 14 | self.dictionary = dictionary 15 | 16 | def forward(self, src_tokens, src_lengths=None, **kwargs): 17 | """ 18 | Args: 19 | src_tokens (LongTensor): tokens in the source language of shape 20 | `(batch, src_len)` 21 | src_lengths (LongTensor): lengths of each source sentence of shape 22 | `(batch)` 23 | """ 24 | raise NotImplementedError 25 | 26 | def reorder_encoder_out(self, encoder_out, new_order): 27 | """ 28 | Reorder encoder output according to `new_order`. 29 | 30 | Args: 31 | encoder_out: output from the ``forward()`` method 32 | new_order (LongTensor): desired order 33 | 34 | Returns: 35 | `encoder_out` rearranged according to `new_order` 36 | """ 37 | raise NotImplementedError 38 | 39 | def max_positions(self): 40 | """Maximum input length supported by the encoder.""" 41 | return 1e6 # an arbitrary large number 42 | 43 | def upgrade_state_dict(self, state_dict): 44 | """Upgrade a (possibly old) state dict for new versions of fairseq.""" 45 | return state_dict 46 | -------------------------------------------------------------------------------- /style_paraphrase/examples/run_evaluate_paraphrase.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=eval_gpt2_0 3 | #SBATCH -o style_paraphrase/logs/log_eval_0.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition=1080ti-long 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --cpus-per-task=15 8 | #SBATCH --mem=300GB 9 | #SBATCH -d singleton 10 | 11 | # Experiment Details :- GPT2-large model for paraphrasing. 12 | # Run Details :- accumulation = 2, batch_size = 5, beam_size = 1, cpus = 3, dataset = datasets/paranmt_filtered, eval_batch_size = 1, global_dense_feature_list = none, gpu = m40, learning_rate = 5e-5, memory = 50, model_name = gpt2-large, ngpus = 1, num_epochs = 3, optimizer = adam, prefix_input_type = original, save_steps = 500, save_total_limit = -1, specific_style_train = -1, stop_token = eos 13 | 14 | export DATA_DIR=datasets/paranmt_filtered 15 | 16 | BASE_DIR=style_paraphrase 17 | 18 | python -m torch.distributed.launch --nproc_per_node=1 $BASE_DIR/run_lm_finetuning.py \ 19 | --output_dir=$BASE_DIR/saved_models/test_paraphrase \ 20 | --model_type=gpt2 \ 21 | --model_name_or_path=gpt2-large \ 22 | --data_dir=$DATA_DIR \ 23 | --do_eval \ 24 | --do_delete_old \ 25 | --save_steps 1000 \ 26 | --logging_steps 1000 \ 27 | --save_total_limit 3 \ 28 | --evaluate_during_training \ 29 | --num_train_epochs 3 \ 30 | --gradient_accumulation_steps 2 \ 31 | --per_gpu_train_batch_size 5 \ 32 | --limit_examples 1000 \ 33 | --job_id paraphraser_test \ 34 | --learning_rate 5e-5 \ 35 | --prefix_input_type original \ 36 | --global_dense_feature_list none \ 37 | --specific_style_train -1 38 | 39 | -------------------------------------------------------------------------------- /style_paraphrase/examples/shakespeare/run_finetune_shakespeare_0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=finetune_gpt2_shakespeare_0 3 | #SBATCH -o style_paraphrase/logs/log_shakespeare_0.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition=m40-long 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --cpus-per-task=3 8 | #SBATCH --mem=50GB 9 | #SBATCH -d singleton 10 | 11 | # Experiment Details :- GPT2-large model for shakespeare. 12 | # Run Details :- accumulation = 2, batch_size = 5, beam_size = 1, cpus = 3, dataset = datasets/shakespeare, eval_batch_size = 1, global_dense_feature_list = none, gpu = m40, learning_rate = 5e-5, memory = 50, model_name = gpt2-large, ngpus = 1, num_epochs = 3, optimizer = adam, prefix_input_type = paraphrase_250, save_steps = 500, save_total_limit = -1, specific_style_train = 0, stop_token = eos 13 | 14 | export DATA_DIR=datasets/shakespeare 15 | 16 | BASE_DIR=style_paraphrase 17 | 18 | python -m torch.distributed.launch --nproc_per_node=1 $BASE_DIR/run_lm_finetuning.py \ 19 | --output_dir=$BASE_DIR/saved_models/model_shakespeare_0 \ 20 | --model_type=gpt2 \ 21 | --model_name_or_path=gpt2-large \ 22 | --do_train \ 23 | --data_dir=$DATA_DIR \ 24 | --save_steps 500 \ 25 | --logging_steps 20 \ 26 | --save_total_limit -1 \ 27 | --evaluate_during_training \ 28 | --num_train_epochs 3 \ 29 | --gradient_accumulation_steps 2 \ 30 | --per_gpu_train_batch_size 5 \ 31 | --job_id shakespeare_0 \ 32 | --learning_rate 5e-5 \ 33 | --prefix_input_type paraphrase_250 \ 34 | --global_dense_feature_list none \ 35 | --specific_style_train 0 \ 36 | --optimizer adam 37 | 38 | -------------------------------------------------------------------------------- /style_paraphrase/examples/shakespeare/run_finetune_shakespeare_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=finetune_gpt2_shakespeare_1 3 | #SBATCH -o style_paraphrase/logs/log_shakespeare_1.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition=m40-long 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --cpus-per-task=3 8 | #SBATCH --mem=50GB 9 | #SBATCH -d singleton 10 | 11 | # Experiment Details :- GPT2-large model for shakespeare. 12 | # Run Details :- accumulation = 2, batch_size = 5, beam_size = 1, cpus = 3, dataset = datasets/shakespeare, eval_batch_size = 1, global_dense_feature_list = none, gpu = m40, learning_rate = 5e-5, memory = 50, model_name = gpt2-large, ngpus = 1, num_epochs = 3, optimizer = adam, prefix_input_type = paraphrase_250, save_steps = 500, save_total_limit = -1, specific_style_train = 1, stop_token = eos 13 | 14 | export DATA_DIR=datasets/shakespeare 15 | 16 | BASE_DIR=style_paraphrase 17 | 18 | python -m torch.distributed.launch --nproc_per_node=1 $BASE_DIR/run_lm_finetuning.py \ 19 | --output_dir=$BASE_DIR/saved_models/model_shakespeare_1 \ 20 | --model_type=gpt2 \ 21 | --model_name_or_path=gpt2-large \ 22 | --do_train \ 23 | --data_dir=$DATA_DIR \ 24 | --save_steps 500 \ 25 | --logging_steps 20 \ 26 | --save_total_limit -1 \ 27 | --evaluate_during_training \ 28 | --num_train_epochs 3 \ 29 | --gradient_accumulation_steps 2 \ 30 | --per_gpu_train_batch_size 5 \ 31 | --job_id shakespeare_1 \ 32 | --learning_rate 5e-5 \ 33 | --prefix_input_type paraphrase_250 \ 34 | --global_dense_feature_list none \ 35 | --specific_style_train 1 \ 36 | --optimizer adam 37 | 38 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/concat_sentences_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | from . import FairseqDataset 9 | 10 | 11 | class ConcatSentencesDataset(FairseqDataset): 12 | 13 | def __init__(self, *datasets): 14 | super().__init__() 15 | self.datasets = datasets 16 | assert all(len(ds) == len(datasets[0]) for ds in datasets), \ 17 | 'datasets must have the same length' 18 | 19 | def __getitem__(self, index): 20 | return torch.cat([ds[index] for ds in self.datasets]) 21 | 22 | def __len__(self): 23 | return len(self.datasets[0]) 24 | 25 | def collater(self, samples): 26 | return self.datasets[0].collater(samples) 27 | 28 | @property 29 | def sizes(self): 30 | return sum(ds.sizes for ds in self.datasets) 31 | 32 | def num_tokens(self, index): 33 | return sum(ds.num_tokens(index) for ds in self.datasets) 34 | 35 | def size(self, index): 36 | return sum(ds.size(index) for ds in self.datasets) 37 | 38 | def ordered_indices(self): 39 | return self.datasets[0].ordered_indices() 40 | 41 | @property 42 | def supports_prefetch(self): 43 | return any( 44 | getattr(ds, 'supports_prefetch', False) for ds in self.datasets 45 | ) 46 | 47 | def prefetch(self, indices): 48 | for ds in self.datasets: 49 | if getattr(ds, 'supports_prefetch', False): 50 | ds.prefetch(indices) 51 | -------------------------------------------------------------------------------- /style_paraphrase/examples/run_finetune_paraphrase.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --job-name=finetune_gpt2_test_paraphrase 3 | #SBATCH -o style_paraphrase/logs/log_test_paraphrase.txt 4 | #SBATCH --time=167:00:00 5 | #SBATCH --partition=m40-long 6 | #SBATCH --gres=gpu:1 7 | #SBATCH --cpus-per-task=3 8 | #SBATCH --mem=50GB 9 | #SBATCH -d singleton 10 | 11 | # Experiment Details :- GPT2-large model for paraphrasing. 12 | # Run Details :- accumulation = 2, batch_size = 5, beam_size = 1, cpus = 3, dataset = datasets/paranmt_filtered, eval_batch_size = 1, global_dense_feature_list = none, gpu = m40, learning_rate = 5e-5, memory = 50, model_name = gpt2-large, ngpus = 1, num_epochs = 3, optimizer = adam, prefix_input_type = original, save_steps = 500, save_total_limit = -1, specific_style_train = -1, stop_token = eos 13 | 14 | export DATA_DIR=datasets/paranmt_filtered 15 | 16 | source style-venv/bin/activate 17 | 18 | BASE_DIR=style_paraphrase 19 | 20 | python -m torch.distributed.launch --nproc_per_node=1 $BASE_DIR/run_lm_finetuning.py \ 21 | --output_dir=$BASE_DIR/saved_models/test_paraphrase \ 22 | --model_type=gpt2 \ 23 | --model_name_or_path=gpt2-large \ 24 | --data_dir=$DATA_DIR \ 25 | --do_train \ 26 | --save_steps 500 \ 27 | --logging_steps 20 \ 28 | --save_total_limit -1 \ 29 | --evaluate_during_training \ 30 | --num_train_epochs 3 \ 31 | --gradient_accumulation_steps 2 \ 32 | --per_gpu_train_batch_size 5 \ 33 | --job_id paraphraser_test \ 34 | --learning_rate 5e-5 \ 35 | --prefix_input_type original \ 36 | --global_dense_feature_list none \ 37 | --specific_style_train -1 \ 38 | --optimizer adam 39 | 40 | -------------------------------------------------------------------------------- /fairseq/docs/tasks.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | .. module:: fairseq.tasks 5 | 6 | .. _Tasks: 7 | 8 | Tasks 9 | ===== 10 | 11 | Tasks store dictionaries and provide helpers for loading/iterating over 12 | Datasets, initializing the Model/Criterion and calculating the loss. 13 | 14 | Tasks can be selected via the ``--task`` command-line argument. Once selected, a 15 | task may expose additional command-line arguments for further configuration. 16 | 17 | Example usage:: 18 | 19 | # setup the task (e.g., load dictionaries) 20 | task = fairseq.tasks.setup_task(args) 21 | 22 | # build model and criterion 23 | model = task.build_model(args) 24 | criterion = task.build_criterion(args) 25 | 26 | # load datasets 27 | task.load_dataset('train') 28 | task.load_dataset('valid') 29 | 30 | # iterate over mini-batches of data 31 | batch_itr = task.get_batch_iterator( 32 | task.dataset('train'), max_tokens=4096, 33 | ) 34 | for batch in batch_itr: 35 | # compute the loss 36 | loss, sample_size, logging_output = task.get_loss( 37 | model, criterion, batch, 38 | ) 39 | loss.backward() 40 | 41 | 42 | Translation 43 | ----------- 44 | 45 | .. autoclass:: fairseq.tasks.translation.TranslationTask 46 | 47 | .. _language modeling: 48 | 49 | Language Modeling 50 | ----------------- 51 | 52 | .. autoclass:: fairseq.tasks.language_modeling.LanguageModelingTask 53 | 54 | 55 | Adding new tasks 56 | ---------------- 57 | 58 | .. autofunction:: fairseq.tasks.register_task 59 | .. autoclass:: fairseq.tasks.FairseqTask 60 | :members: 61 | :undoc-members: 62 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #define SHFL_MASK 0xffffffff 27 | 28 | template 29 | __global__ 30 | void dynamicconv_forward_kernel(const scalar_t* input, 31 | const scalar_t* weight, 32 | int minibatch, 33 | int sequenceLength, 34 | int numFeatures, 35 | int numFiltersInBlock, 36 | int numHeads, 37 | scalar_t* output); 38 | 39 | template 40 | __global__ 41 | void dynamicconv_backward_kernel( 42 | const scalar_t* gradOutput, // B * C * T 43 | const scalar_t* input, // B * C * T 44 | const scalar_t* weight, 45 | int minibatch, 46 | int sequenceLength, 47 | int numFeatures, 48 | int numFiltersInBlock, 49 | int numHeads, 50 | scalar_t* gradWeight, 51 | scalar_t* gradInput); // B * H * k * T 52 | -------------------------------------------------------------------------------- /transformers/docs/source/notebooks.rst: -------------------------------------------------------------------------------- 1 | Notebooks 2 | ================================================ 3 | 4 | We include `three Jupyter Notebooks `_ that can be used to check that the predictions of the PyTorch model are identical to the predictions of the original TensorFlow model. 5 | 6 | 7 | * 8 | The first NoteBook (\ `Comparing-TF-and-PT-models.ipynb `_\ ) extracts the hidden states of a full sequence on each layers of the TensorFlow and the PyTorch models and computes the standard deviation between them. In the given example, we get a standard deviation of 1.5e-7 to 9e-7 on the various hidden state of the models. 9 | 10 | * 11 | The second NoteBook (\ `Comparing-TF-and-PT-models-SQuAD.ipynb `_\ ) compares the loss computed by the TensorFlow and the PyTorch models for identical initialization of the fine-tuning layer of the ``BertForQuestionAnswering`` and computes the standard deviation between them. In the given example, we get a standard deviation of 2.5e-7 between the models. 12 | 13 | * 14 | The third NoteBook (\ `Comparing-TF-and-PT-models-MLM-NSP.ipynb `_\ ) compares the predictions computed by the TensorFlow and the PyTorch models for masked token language modeling using the pre-trained masked language modeling model. 15 | 16 | Please follow the instructions given in the notebooks to run and modify them. 17 | -------------------------------------------------------------------------------- /README-multilingual.md: -------------------------------------------------------------------------------- 1 | We are releasing multilingual formality classifiers by fine-tuning large multilingual language models on English GYAFC, to facilitate zero-shot cross-lingual transfer. We evaluated these classifiers on [XFORMAL](https://arxiv.org/abs/2104.04108). 2 | 3 | For each language, we **lower-case sentences and remove trailing punctuation** to stop the model from latching onto easy indicators of formality. 4 | 5 | Our results seem to indicate that XLM MADX is the best model, followed by XLM. 6 | 7 | MAD-X and XLM-R base classifier checkpoints --- [link](https://drive.google.com/drive/folders/1EUYKeFslhSb_po6jwb7Pqkny5_zNsct6?usp=sharing) 8 | 9 | The MAD-X adapter is also available on AdapterHub [here](https://adapterhub.ml/adapters/martiansideofthemoon/xlm-roberta-base_formality_classify_gyafc_pfeiffer/). 10 | 11 | **Italian** 12 | 13 | | Model | relative Accuracy | absolute Accuracy | 14 | |----------|-------------------|-------------------| 15 | | mBERT | 87.9 | 72.7 | 16 | | XLM | 88.1 | 75.0 | 17 | | XLM MADX | 92.5 | 78.8 | 18 | 19 | **French** 20 | 21 | | Model | relative Accuracy | absolute Accuracy | 22 | |------------|-------------------|-------------------| 23 | | mBERT | 87.7 | 72.4 | 24 | | mBERT MADX | 88.0 | 63.5 | 25 | | XLM | 90.3 | 78.9 | 26 | 27 | **Brazilian Portuguese** 28 | 29 | | Model | relative Accuracy | absolute Accuracy | 30 | |-------|-------------------|-------------------| 31 | | mBERT | 86.4 | 72.6 | 32 | | XLM | 89.3 | 78.1 | 33 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | std::vector lightconv_cuda_forward( 12 | at::Tensor input, 13 | at::Tensor filters, 14 | int padding_l); 15 | 16 | std::vector lightconv_cuda_backward( 17 | at::Tensor gradOutput, 18 | int padding_l, 19 | at::Tensor input, 20 | at::Tensor filters); 21 | 22 | 23 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") 24 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") 25 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 26 | 27 | std::vector lightconv_forward( 28 | at::Tensor input, 29 | at::Tensor filters, 30 | int padding_l) { 31 | 32 | CHECK_INPUT(input); 33 | CHECK_INPUT(filters); 34 | 35 | return lightconv_cuda_forward(input, filters, padding_l); 36 | } 37 | 38 | std::vector lightconv_backward( 39 | at::Tensor gradOutput, 40 | int padding_l, 41 | at::Tensor input, 42 | at::Tensor filters) { 43 | 44 | CHECK_INPUT(gradOutput); 45 | CHECK_INPUT(input); 46 | CHECK_INPUT(filters); 47 | 48 | return lightconv_cuda_backward(gradOutput, padding_l, input, filters); 49 | } 50 | 51 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 52 | m.def("forward", &lightconv_forward, "lighconv forward (CUDA)"); 53 | m.def("backward", &lightconv_backward, "lighconv backward (CUDA)"); 54 | } 55 | -------------------------------------------------------------------------------- /fairseq/examples/backtranslation/README.md: -------------------------------------------------------------------------------- 1 | # Understanding Back-Translation at Scale (Edunov et al., 2018) 2 | 3 | This page includes pre-trained models from the paper [Understanding Back-Translation at Scale (Edunov et al., 2018)](https://arxiv.org/abs/1808.09381). 4 | 5 | ## Pre-trained models 6 | 7 | Model | Description | Dataset | Download 8 | ---|---|---|--- 9 | `transformer.wmt18.en-de` | Transformer ([Edunov et al., 2018](https://arxiv.org/abs/1808.09381)) WMT'18 winner | [WMT'18 English-German](http://www.statmt.org/wmt18/translation-task.html) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt18.en-de.ensemble.tar.gz) See NOTE in the archive 10 | 11 | ## Example usage 12 | 13 | Interactive generation from the full ensemble via PyTorch Hub: 14 | ```python 15 | import torch 16 | 17 | # List available models 18 | torch.hub.list('pytorch/fairseq') # [..., 'transformer.wmt18.en-de', ... ] 19 | 20 | # Load the WMT'18 En-De ensemble 21 | en2de_ensemble = torch.hub.load( 22 | 'pytorch/fairseq', 'transformer.wmt18.en-de', 23 | checkpoint_file='wmt18.model1.pt:wmt18.model2.pt:wmt18.model3.pt:wmt18.model4.pt:wmt18.model5.pt', 24 | tokenizer='moses', bpe='subword_nmt') 25 | 26 | # The ensemble contains 5 models 27 | len(en2de_ensemble.models) 28 | # 5 29 | 30 | # Translate 31 | en2de_ensemble.translate('Hello world!') 32 | # 'Hallo Welt!' 33 | ``` 34 | 35 | ## Citation 36 | ```bibtex 37 | @inproceedings{edunov2018backtranslation, 38 | title = {Understanding Back-Translation at Scale}, 39 | author = {Edunov, Sergey and Ott, Myle and Auli, Michael and Grangier, David}, 40 | booktitle = {Conference of the Association for Computational Linguistics (ACL)}, 41 | year = 2018, 42 | } 43 | ``` 44 | -------------------------------------------------------------------------------- /web-demo/strap-frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 22 | React App 23 | 24 | 25 | You need to enable JavaScript to run this app. 26 | 27 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/encoders/gpt2_bpe.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq import file_utils 7 | from fairseq.data.encoders import register_bpe 8 | 9 | from .gpt2_bpe_utils import get_encoder 10 | 11 | 12 | DEFAULT_ENCODER_JSON = 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json' 13 | DEFAULT_VOCAB_BPE = 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe' 14 | 15 | 16 | @register_bpe('gpt2') 17 | class GPT2BPE(object): 18 | 19 | @staticmethod 20 | def add_args(parser): 21 | # fmt: off 22 | parser.add_argument('--gpt2-encoder-json', type=str, 23 | default=DEFAULT_ENCODER_JSON, 24 | help='path to encoder.json') 25 | parser.add_argument('--gpt2-vocab-bpe', type=str, 26 | default=DEFAULT_VOCAB_BPE, 27 | help='path to vocab.bpe') 28 | # fmt: on 29 | 30 | def __init__(self, args): 31 | encoder_json = file_utils.cached_path( 32 | getattr(args, 'gpt2_encoder_json', DEFAULT_ENCODER_JSON) 33 | ) 34 | vocab_bpe = file_utils.cached_path( 35 | getattr(args, 'gpt2_vocab_bpe', DEFAULT_VOCAB_BPE) 36 | ) 37 | self.bpe = get_encoder(encoder_json, vocab_bpe) 38 | 39 | def encode(self, x: str) -> str: 40 | return ' '.join(map(str, self.bpe.encode(x))) 41 | 42 | def decode(self, x: str) -> str: 43 | return self.bpe.decode(map(int, x.split())) 44 | 45 | def is_beginning_of_word(self, x: str) -> bool: 46 | return self.decode(x).startswith(' ') 47 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | std::vector dynamicconv_cuda_forward( 12 | at::Tensor input, 13 | at::Tensor filters, 14 | int padding_l); 15 | 16 | std::vector dynamicconv_cuda_backward( 17 | at::Tensor gradOutput, 18 | int padding_l, 19 | at::Tensor input, 20 | at::Tensor filters); 21 | 22 | 23 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") 24 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") 25 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 26 | 27 | std::vector dynamicconv_forward( 28 | at::Tensor input, 29 | at::Tensor filters, 30 | int padding_l) { 31 | 32 | CHECK_INPUT(input); 33 | CHECK_INPUT(filters); 34 | 35 | return dynamicconv_cuda_forward(input, filters, 36 | padding_l); 37 | } 38 | 39 | std::vector dynamicconv_backward( 40 | at::Tensor gradOutput, 41 | int padding_l, 42 | at::Tensor input, 43 | at::Tensor filters) { 44 | 45 | CHECK_INPUT(gradOutput); 46 | CHECK_INPUT(input); 47 | CHECK_INPUT(filters); 48 | 49 | return dynamicconv_cuda_backward(gradOutput, padding_l, 50 | input, filters); 51 | } 52 | 53 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 54 | m.def("forward", &dynamicconv_forward, "dynamicconv forward (CUDA)"); 55 | m.def("backward", &dynamicconv_backward, "dynamicconv backward (CUDA)"); 56 | } 57 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/xlm.rst: -------------------------------------------------------------------------------- 1 | XLM 2 | ---------------------------------------------------- 3 | 4 | ``XLMConfig`` 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | .. autoclass:: transformers.XLMConfig 8 | :members: 9 | 10 | ``XLMTokenizer`` 11 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 12 | 13 | .. autoclass:: transformers.XLMTokenizer 14 | :members: 15 | 16 | ``XLMModel`` 17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | .. autoclass:: transformers.XLMModel 20 | :members: 21 | 22 | 23 | ``XLMWithLMHeadModel`` 24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. autoclass:: transformers.XLMWithLMHeadModel 27 | :members: 28 | 29 | 30 | ``XLMForSequenceClassification`` 31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | 33 | .. autoclass:: transformers.XLMForSequenceClassification 34 | :members: 35 | 36 | 37 | ``XLMForQuestionAnswering`` 38 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 39 | 40 | .. autoclass:: transformers.XLMForQuestionAnswering 41 | :members: 42 | 43 | 44 | ``TFXLMModel`` 45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | .. autoclass:: transformers.TFXLMModel 48 | :members: 49 | 50 | 51 | ``TFXLMWithLMHeadModel`` 52 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 53 | 54 | .. autoclass:: transformers.TFXLMWithLMHeadModel 55 | :members: 56 | 57 | 58 | ``TFXLMForSequenceClassification`` 59 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 60 | 61 | .. autoclass:: transformers.TFXLMForSequenceClassification 62 | :members: 63 | 64 | 65 | ``TFXLMForQuestionAnsweringSimple`` 66 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 67 | 68 | .. autoclass:: transformers.TFXLMForQuestionAnsweringSimple 69 | :members: 70 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/distilbert.rst: -------------------------------------------------------------------------------- 1 | DistilBERT 2 | ---------------------------------------------------- 3 | 4 | ``DistilBertConfig`` 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | .. autoclass:: transformers.DistilBertConfig 8 | :members: 9 | 10 | 11 | ``DistilBertTokenizer`` 12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.DistilBertTokenizer 15 | :members: 16 | 17 | 18 | ``DistilBertModel`` 19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autoclass:: transformers.DistilBertModel 22 | :members: 23 | 24 | 25 | ``DistilBertForMaskedLM`` 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | .. autoclass:: transformers.DistilBertForMaskedLM 29 | :members: 30 | 31 | 32 | ``DistilBertForSequenceClassification`` 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | .. autoclass:: transformers.DistilBertForSequenceClassification 36 | :members: 37 | 38 | 39 | ``DistilBertForQuestionAnswering`` 40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | .. autoclass:: transformers.DistilBertForQuestionAnswering 43 | :members: 44 | 45 | ``TFDistilBertModel`` 46 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 47 | 48 | .. autoclass:: transformers.TFDistilBertModel 49 | :members: 50 | 51 | 52 | ``TFDistilBertForMaskedLM`` 53 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 54 | 55 | .. autoclass:: transformers.TFDistilBertForMaskedLM 56 | :members: 57 | 58 | 59 | ``TFDistilBertForSequenceClassification`` 60 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 61 | 62 | .. autoclass:: transformers.TFDistilBertForSequenceClassification 63 | :members: 64 | 65 | 66 | ``TFDistilBertForQuestionAnswering`` 67 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 68 | 69 | .. autoclass:: transformers.TFDistilBertForQuestionAnswering 70 | :members: 71 | -------------------------------------------------------------------------------- /fairseq/scripts/shard_docs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Split a large file into shards while respecting document boundaries. Documents 8 | should be separated by a single empty line. 9 | """ 10 | 11 | import argparse 12 | import contextlib 13 | 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('input') 18 | parser.add_argument('--num-shards', type=int) 19 | args = parser.parse_args() 20 | 21 | assert args.num_shards is not None and args.num_shards > 1 22 | 23 | with open(args.input, 'r', encoding='utf-8') as h: 24 | with contextlib.ExitStack() as stack: 25 | outputs = [ 26 | stack.enter_context(open(args.input + ".shard" + str(i), "w", encoding="utf-8")) 27 | for i in range(args.num_shards) 28 | ] 29 | 30 | doc = [] 31 | first_doc = [True]*args.num_shards 32 | def output_doc(i): 33 | if not first_doc[i]: 34 | outputs[i].write("\n") 35 | first_doc[i] = False 36 | for line in doc: 37 | outputs[i].write(line) 38 | doc.clear() 39 | 40 | num_docs = 0 41 | for line in h: 42 | if line.strip() == "": # empty line indicates new document 43 | output_doc(num_docs % args.num_shards) 44 | num_docs += 1 45 | else: 46 | doc.append(line) 47 | output_doc(num_docs % args.num_shards) 48 | 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /transformers/docs/source/model_doc/xlnet.rst: -------------------------------------------------------------------------------- 1 | XLNet 2 | ---------------------------------------------------- 3 | 4 | ``XLNetConfig`` 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | .. autoclass:: transformers.XLNetConfig 8 | :members: 9 | 10 | 11 | ``XLNetTokenizer`` 12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: transformers.XLNetTokenizer 15 | :members: 16 | 17 | 18 | ``XLNetModel`` 19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autoclass:: transformers.XLNetModel 22 | :members: 23 | 24 | 25 | ``XLNetLMHeadModel`` 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | .. autoclass:: transformers.XLNetLMHeadModel 29 | :members: 30 | 31 | 32 | ``XLNetForSequenceClassification`` 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | .. autoclass:: transformers.XLNetForSequenceClassification 36 | :members: 37 | 38 | 39 | ``XLNetForQuestionAnswering`` 40 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | .. autoclass:: transformers.XLNetForQuestionAnswering 43 | :members: 44 | 45 | 46 | ``TFXLNetModel`` 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | .. autoclass:: transformers.TFXLNetModel 50 | :members: 51 | 52 | 53 | ``TFXLNetLMHeadModel`` 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | .. autoclass:: transformers.TFXLNetLMHeadModel 57 | :members: 58 | 59 | 60 | ``TFXLNetForSequenceClassification`` 61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 62 | 63 | .. autoclass:: transformers.TFXLNetForSequenceClassification 64 | :members: 65 | 66 | 67 | ``TFXLNetForQuestionAnsweringSimple`` 68 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | 70 | .. autoclass:: transformers.TFXLNetForQuestionAnsweringSimple 71 | :members: 72 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/legacy/masked_lm_dictionary.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq.data import Dictionary 7 | 8 | 9 | class MaskedLMDictionary(Dictionary): 10 | """ 11 | Dictionary for Masked Language Modelling tasks. This extends Dictionary by 12 | adding the mask symbol. 13 | """ 14 | def __init__( 15 | self, 16 | pad='', 17 | eos='', 18 | unk='', 19 | mask='', 20 | ): 21 | super().__init__(pad, eos, unk) 22 | self.mask_word = mask 23 | self.mask_index = self.add_symbol(mask) 24 | self.nspecial = len(self.symbols) 25 | 26 | def mask(self): 27 | """Helper to get index of mask symbol""" 28 | return self.mask_index 29 | 30 | 31 | class BertDictionary(MaskedLMDictionary): 32 | """ 33 | Dictionary for BERT task. This extends MaskedLMDictionary by adding support 34 | for cls and sep symbols. 35 | """ 36 | def __init__( 37 | self, 38 | pad='', 39 | eos='', 40 | unk='', 41 | mask='', 42 | cls='', 43 | sep='' 44 | ): 45 | super().__init__(pad, eos, unk, mask) 46 | self.cls_word = cls 47 | self.sep_word = sep 48 | self.cls_index = self.add_symbol(cls) 49 | self.sep_index = self.add_symbol(sep) 50 | self.nspecial = len(self.symbols) 51 | 52 | def cls(self): 53 | """Helper to get index of cls symbol""" 54 | return self.cls_index 55 | 56 | def sep(self): 57 | """Helper to get index of sep symbol""" 58 | return self.sep_index 59 | -------------------------------------------------------------------------------- /fairseq/fairseq/data/encoders/subword_nmt_bpe.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq import file_utils 7 | from fairseq.data.encoders import register_bpe 8 | 9 | 10 | @register_bpe('subword_nmt') 11 | class SubwordNMTBPE(object): 12 | 13 | @staticmethod 14 | def add_args(parser): 15 | # fmt: off 16 | parser.add_argument('--bpe-codes', type=str, 17 | help='path to subword NMT BPE') 18 | parser.add_argument('--bpe-separator', default='@@', 19 | help='BPE separator') 20 | # fmt: on 21 | 22 | def __init__(self, args): 23 | if args.bpe_codes is None: 24 | raise ValueError('--bpe-codes is required for --bpe=subword_nmt') 25 | codes = file_utils.cached_path(args.bpe_codes) 26 | try: 27 | from subword_nmt import apply_bpe 28 | bpe_parser = apply_bpe.create_parser() 29 | bpe_args = bpe_parser.parse_args([ 30 | '--codes', codes, 31 | '--separator', args.bpe_separator, 32 | ]) 33 | self.bpe = apply_bpe.BPE( 34 | bpe_args.codes, 35 | bpe_args.merges, 36 | bpe_args.separator, 37 | None, 38 | bpe_args.glossaries, 39 | ) 40 | self.bpe_symbol = bpe_args.separator + ' ' 41 | except ImportError: 42 | raise ImportError('Please install subword_nmt with: pip install subword-nmt') 43 | 44 | def encode(self, x: str) -> str: 45 | return self.bpe.process_line(x) 46 | 47 | def decode(self, x: str) -> str: 48 | return (x + ' ').replace(self.bpe_symbol, '').rstrip() 49 | -------------------------------------------------------------------------------- /fairseq/fairseq/modules/sparse_transformer_sentence_encoder_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq.modules import TransformerSentenceEncoderLayer 7 | from fairseq.modules.sparse_multihead_attention import SparseMultiheadAttention 8 | 9 | 10 | class SparseTransformerSentenceEncoderLayer(TransformerSentenceEncoderLayer): 11 | """ 12 | Implements a Sprase Transformer Encoder Layer (see SparseMultiheadAttention) 13 | """ 14 | 15 | def __init__( 16 | self, 17 | embedding_dim: float = 768, 18 | ffn_embedding_dim: float = 3072, 19 | num_attention_heads: float = 8, 20 | dropout: float = 0.1, 21 | attention_dropout: float = 0.1, 22 | activation_dropout: float = 0.1, 23 | activation_fn: str = 'relu', 24 | add_bias_kv: bool = False, 25 | add_zero_attn: bool = False, 26 | export: bool = False, 27 | is_bidirectional: bool = True, 28 | stride: int = 32, 29 | expressivity: int = 8, 30 | ) -> None: 31 | 32 | super().__init__( 33 | embedding_dim, ffn_embedding_dim, num_attention_heads, dropout, 34 | attention_dropout, activation_dropout, activation_fn, add_bias_kv, 35 | add_zero_attn, export 36 | ) 37 | 38 | self.self_attn = SparseMultiheadAttention( 39 | self.embedding_dim, 40 | num_attention_heads, 41 | dropout=attention_dropout, 42 | add_bias_kv=add_bias_kv, 43 | add_zero_attn=add_zero_attn, 44 | self_attention=True, 45 | is_bidirectional=is_bidirectional, 46 | stride=stride, 47 | expressivity=expressivity, 48 | ) 49 | -------------------------------------------------------------------------------- /fairseq/tests/test_character_token_embedder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import unittest 8 | 9 | from fairseq.data import Dictionary 10 | from fairseq.modules import CharacterTokenEmbedder 11 | 12 | 13 | class TestCharacterTokenEmbedder(unittest.TestCase): 14 | def test_character_token_embedder(self): 15 | vocab = Dictionary() 16 | vocab.add_symbol('hello') 17 | vocab.add_symbol('there') 18 | 19 | embedder = CharacterTokenEmbedder(vocab, [(2, 16), (4, 32), (8, 64), (16, 2)], 64, 5, 2) 20 | 21 | test_sents = [['hello', 'unk', 'there'], ['there'], ['hello', 'there']] 22 | max_len = max(len(s) for s in test_sents) 23 | input = torch.LongTensor(len(test_sents), max_len + 2).fill_(vocab.pad()) 24 | for i in range(len(test_sents)): 25 | input[i][0] = vocab.eos() 26 | for j in range(len(test_sents[i])): 27 | input[i][j + 1] = vocab.index(test_sents[i][j]) 28 | input[i][j + 2] = vocab.eos() 29 | embs = embedder(input) 30 | 31 | assert embs.size() == (len(test_sents), max_len + 2, 5) 32 | self.assertAlmostEqual(embs[0][0], embs[1][0]) 33 | self.assertAlmostEqual(embs[0][0], embs[0][-1]) 34 | self.assertAlmostEqual(embs[0][1], embs[2][1]) 35 | self.assertAlmostEqual(embs[0][3], embs[1][1]) 36 | 37 | embs.sum().backward() 38 | assert embedder.char_embeddings.weight.grad is not None 39 | 40 | def assertAlmostEqual(self, t1, t2): 41 | self.assertEqual(t1.size(), t2.size(), "size mismatch") 42 | self.assertLess((t1 - t2).abs().max(), 1e-6) 43 | 44 | 45 | if __name__ == '__main__': 46 | unittest.main() 47 | -------------------------------------------------------------------------------- /fairseq/tests/test_convtbc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import unittest 8 | from fairseq.modules import ConvTBC 9 | import torch.nn as nn 10 | 11 | 12 | class TestConvTBC(unittest.TestCase): 13 | 14 | def test_convtbc(self): 15 | # ksz, in_channels, out_channels 16 | conv_tbc = ConvTBC(4, 5, kernel_size=3, padding=1) 17 | # out_channels, in_channels, ksz 18 | conv1d = nn.Conv1d(4, 5, kernel_size=3, padding=1) 19 | 20 | conv_tbc.weight.data.copy_(conv1d.weight.data.transpose(0, 2)) 21 | conv_tbc.bias.data.copy_(conv1d.bias.data) 22 | 23 | input_tbc = torch.randn(7, 2, 4, requires_grad=True) 24 | input1d = input_tbc.data.transpose(0, 1).transpose(1, 2) 25 | input1d.requires_grad = True 26 | 27 | output_tbc = conv_tbc(input_tbc) 28 | output1d = conv1d(input1d) 29 | 30 | self.assertAlmostEqual(output_tbc.data.transpose(0, 1).transpose(1, 2), output1d.data) 31 | 32 | grad_tbc = torch.randn(output_tbc.size()) 33 | grad1d = grad_tbc.transpose(0, 1).transpose(1, 2).contiguous() 34 | 35 | output_tbc.backward(grad_tbc) 36 | output1d.backward(grad1d) 37 | 38 | self.assertAlmostEqual(conv_tbc.weight.grad.data.transpose(0, 2), conv1d.weight.grad.data) 39 | self.assertAlmostEqual(conv_tbc.bias.grad.data, conv1d.bias.grad.data) 40 | self.assertAlmostEqual(input_tbc.grad.data.transpose(0, 1).transpose(1, 2), input1d.grad.data) 41 | 42 | def assertAlmostEqual(self, t1, t2): 43 | self.assertEqual(t1.size(), t2.size(), "size mismatch") 44 | self.assertLess((t1 - t2).abs().max(), 1e-4) 45 | 46 | 47 | if __name__ == '__main__': 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /fairseq/fairseq/models/model_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | 9 | def skip_tensors(x, mask): 10 | """ 11 | Getting sliced (dim=0) tensor by mask. Supporting tensor and list/dict of tensors. 12 | """ 13 | if isinstance(x, int): 14 | return x 15 | 16 | if x is None: 17 | return None 18 | 19 | if isinstance(x, torch.Tensor): 20 | if x.size(0) == mask.size(0): 21 | return x[mask] 22 | elif x.size(1) == mask.size(0): 23 | return x[:, mask] 24 | 25 | if isinstance(x, list): 26 | return [skip_tensors(x_i, mask) for x_i in x] 27 | 28 | if isinstance(x, dict): 29 | return {k: skip_tensors(v, mask) for k, v in x.items()} 30 | 31 | raise NotImplementedError 32 | 33 | 34 | def fill_tensors(x, mask, y, padding_idx): 35 | """ 36 | Filling tensor x with y at masked positions (dim=0). 37 | """ 38 | if x is None: 39 | return y 40 | assert x.dim() == y.dim() and mask.size(0) == x.size(0) 41 | assert x.dim() == 2 or (x.dim() == 3 and x.size(2) == y.size(2)) 42 | n_selected = mask.sum() 43 | assert n_selected == y.size(0) 44 | 45 | if n_selected == x.size(0): 46 | return y 47 | 48 | if x.size(1) < y.size(1): 49 | dims = [x.size(0), y.size(1) - x.size(1)] 50 | if x.dim() == 3: 51 | dims.append(x.size(2)) 52 | x = torch.cat([x, x.new_zeros(*dims).fill_(padding_idx)], 1) 53 | x[mask] = y 54 | elif x.size(1) > y.size(1): 55 | x[mask] = padding_idx 56 | if x.dim() == 2: 57 | x[mask, :y.size(1)] = y 58 | else: 59 | x[mask, :y.size(1), :] = y 60 | else: 61 | x[mask] = y 62 | return x 63 | -------------------------------------------------------------------------------- /fairseq/fairseq/optim/adadelta.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.optim 7 | 8 | from . import FairseqOptimizer, register_optimizer 9 | 10 | 11 | @register_optimizer('adadelta') 12 | class Adadelta(FairseqOptimizer): 13 | def __init__(self, args, params): 14 | super().__init__(args) 15 | self._optimizer = torch.optim.Adadelta(params, **self.optimizer_config) 16 | 17 | @staticmethod 18 | def add_args(parser): 19 | """Add optimizer-specific arguments to the parser.""" 20 | # fmt: off 21 | parser.add_argument('--adadelta-rho', type=float, default=0.9, metavar='RHO', 22 | help='coefficient used for computing a running average of squared gradients') 23 | parser.add_argument('--adadelta-eps', type=float, default=1e-6, metavar='EPS', 24 | help='term added to the denominator to improve numerical stability') 25 | parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', 26 | help='weight decay') 27 | parser.add_argument('--anneal-eps', action='store_true', help='flag to anneal eps') 28 | # fmt: on 29 | 30 | @property 31 | def optimizer_config(self): 32 | """ 33 | Return a kwarg dictionary that will be used to override optimizer 34 | args stored in checkpoints. This allows us to load a checkpoint and 35 | resume training using a different set of optimizer args, e.g., with a 36 | different learning rate. 37 | """ 38 | return { 39 | 'lr': self.args.lr[0], 40 | 'rho': self.args.adadelta_rho, 41 | 'eps': self.args.adadelta_eps, 42 | 'weight_decay': self.args.weight_decay, 43 | } 44 | -------------------------------------------------------------------------------- /style_paraphrase/style_classify/webapp/templates/visual.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | {{ comparison|safe }} 61 | 62 | 63 | 64 | 65 | --------------------------------------------------------------------------------