├── .github
    ├── ISSUE_TEMPLATE.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── documentation.md
    │   ├── feature_request.md
    │   └── how-to-question.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── build.yml
├── .gitignore
├── .gitmodules
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── config
    ├── config.yaml
    ├── config_eval_lm.yaml
    ├── criterion
    │   ├── adaptive_loss.yaml
    │   └── cross_entropy.yaml
    ├── lr_scheduler
    │   ├── cosine.yaml
    │   └── inverse_sqrt.yaml
    ├── model
    │   ├── transformer_lm.yaml
    │   ├── transformer_lm_baevski_gbw.yaml
    │   ├── transformer_lm_baevski_wiki103.yaml
    │   ├── transformer_lm_big.yaml
    │   ├── transformer_lm_gbw.yaml
    │   ├── transformer_lm_gpt.yaml
    │   ├── transformer_lm_gpt2_big.yaml
    │   ├── transformer_lm_gpt2_medium.yaml
    │   ├── transformer_lm_gpt2_small.yaml
    │   └── transformer_lm_wiki103.yaml
    ├── optimizer
    │   ├── adam.yaml
    │   └── nag.yaml
    ├── params
    │   ├── eval_lm_params.yaml
    │   └── training_params.yaml
    └── task
    │   └── language_modeling.yaml
├── docs
    ├── Makefile
    ├── _static
    │   └── theme_overrides.css
    ├── command_line_tools.rst
    ├── conf.py
    ├── criterions.rst
    ├── data.rst
    ├── docutils.conf
    ├── fairseq.gif
    ├── fairseq_logo.png
    ├── getting_started.rst
    ├── hydra_integration.md
    ├── index.rst
    ├── lr_scheduler.rst
    ├── make.bat
    ├── models.rst
    ├── modules.rst
    ├── optim.rst
    ├── overview.rst
    ├── requirements.txt
    ├── tasks.rst
    ├── tutorial_classifying_names.rst
    └── tutorial_simple_lstm.rst
├── examples
    ├── .gitignore
    ├── __init__.py
    ├── backtranslation
    │   ├── README.md
    │   ├── deduplicate_lines.py
    │   ├── extract_bt_data.py
    │   ├── prepare-de-monolingual.sh
    │   ├── prepare-wmt18en2de.sh
    │   ├── sacrebleu.sh
    │   └── tokenized_bleu.sh
    ├── bart
    │   ├── README.glue.md
    │   ├── README.md
    │   └── README.summarization.md
    ├── byte_level_bpe
    │   ├── README.md
    │   ├── get_bitext.py
    │   ├── get_data.sh
    │   └── gru_transformer.py
    ├── camembert
    │   └── README.md
    ├── constrained_decoding
    │   ├── README.md
    │   ├── normalize.py
    │   └── tok.py
    ├── conv_seq2seq
    │   └── README.md
    ├── cross_lingual_language_model
    │   └── README.md
    ├── joint_alignment_translation
    │   ├── README.md
    │   └── prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
    ├── language_model
    │   ├── README.adaptive_inputs.md
    │   ├── README.conv.md
    │   ├── README.md
    │   └── prepare-wikitext-103.sh
    ├── layerdrop
    │   └── README.md
    ├── mbart
    │   └── README.md
    ├── megatron_11b
    │   ├── README.md
    │   └── detok.py
    ├── multilingual
    │   ├── README.md
    │   ├── finetune_multilingual_model.sh
    │   ├── multilingual_fairseq_gen.sh
    │   └── train_multilingual_model.sh
    ├── noisychannel
    │   ├── README.md
    │   ├── __init__.py
    │   ├── rerank.py
    │   ├── rerank_generate.py
    │   ├── rerank_options.py
    │   ├── rerank_score_bw.py
    │   ├── rerank_score_lm.py
    │   ├── rerank_tune.py
    │   └── rerank_utils.py
    ├── nonautoregressive_translation
    │   ├── README.md
    │   └── scripts.md
    ├── paraphraser
    │   ├── README.md
    │   └── paraphrase.py
    ├── pay_less_attention_paper
    │   └── README.md
    ├── quant_noise
    │   ├── README.md
    │   └── transformer_quantization_config.yaml
    ├── roberta
    │   ├── README.custom_classification.md
    │   ├── README.glue.md
    │   ├── README.md
    │   ├── README.pretraining.md
    │   ├── README.race.md
    │   ├── commonsense_qa
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── commonsense_qa_task.py
    │   │   └── download_cqa_data.sh
    │   ├── multiprocessing_bpe_encoder.py
    │   ├── preprocess_GLUE_tasks.sh
    │   ├── preprocess_RACE.py
    │   ├── preprocess_RACE.sh
    │   └── wsc
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── wsc_criterion.py
    │   │   ├── wsc_task.py
    │   │   └── wsc_utils.py
    ├── scaling_nmt
    │   └── README.md
    ├── simultaneous_translation
    │   ├── README.md
    │   ├── __init__.py
    │   ├── criterions
    │   │   ├── __init__.py
    │   │   └── label_smoothed_cross_entropy_latency_augmented.py
    │   ├── docs
    │   │   ├── baseline.md
    │   │   └── evaluation.md
    │   ├── eval
    │   │   ├── __init__.py
    │   │   ├── agents
    │   │   │   ├── __init__.py
    │   │   │   ├── agent.py
    │   │   │   ├── simul_trans_agent.py
    │   │   │   ├── simul_trans_text_agent.py
    │   │   │   └── word_splitter.py
    │   │   ├── client.py
    │   │   ├── eval_latency.py
    │   │   ├── evaluate.py
    │   │   ├── scorers
    │   │   │   ├── __init__.py
    │   │   │   ├── scorer.py
    │   │   │   └── text_scorer.py
    │   │   └── server.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   └── transformer_monotonic_attention.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── monotonic_multihead_attention.py
    │   │   └── monotonic_transformer_layer.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── functions.py
    │   │   └── latency.py
    ├── speech_recognition
    │   ├── README.md
    │   ├── __init__.py
    │   ├── criterions
    │   │   ├── ASG_loss.py
    │   │   ├── __init__.py
    │   │   └── cross_entropy_acc.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── asr_dataset.py
    │   │   ├── collaters.py
    │   │   ├── data_utils.py
    │   │   └── replabels.py
    │   ├── datasets
    │   │   ├── asr_prep_json.py
    │   │   └── prepare-librispeech.sh
    │   ├── infer.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── vggtransformer.py
    │   │   └── w2l_conv_glu_enc.py
    │   ├── tasks
    │   │   ├── __init__.py
    │   │   └── speech_recognition.py
    │   ├── utils
    │   │   └── wer_utils.py
    │   └── w2l_decoder.py
    ├── stories
    │   └── README.md
    ├── translation
    │   ├── README.md
    │   ├── prepare-iwslt14.sh
    │   ├── prepare-iwslt17-multilingual.sh
    │   ├── prepare-wmt14en2de.sh
    │   └── prepare-wmt14en2fr.sh
    ├── translation_moe
    │   ├── README.md
    │   ├── score.py
    │   └── src
    │   │   ├── __init__.py
    │   │   ├── logsumexp_moe.py
    │   │   ├── mean_pool_gating_network.py
    │   │   └── translation_moe.py
    ├── unsupervised_quality_estimation
    │   ├── README.md
    │   ├── aggregate_scores.py
    │   ├── meteor.py
    │   └── repeat_lines.py
    ├── wav2vec
    │   ├── README.md
    │   ├── libri_labels.py
    │   ├── vq-wav2vec_featurize.py
    │   ├── wav2vec_featurize.py
    │   └── wav2vec_manifest.py
    ├── wmt19
    │   └── README.md
    └── xlmr
    │   └── README.md
├── fairseq
    ├── __init__.py
    ├── benchmark
    │   ├── __init__.py
    │   ├── dummy_lm.py
    │   ├── dummy_masked_lm.py
    │   ├── dummy_model.py
    │   └── dummy_mt.py
    ├── binarizer.py
    ├── checkpoint_utils.py
    ├── clib
    │   ├── libbleu
    │   │   ├── libbleu.cpp
    │   │   └── module.cpp
    │   ├── libnat
    │   │   └── edit_dist.cpp
    │   └── libnat_cuda
    │   │   ├── binding.cpp
    │   │   ├── edit_dist.cu
    │   │   └── edit_dist.h
    ├── criterions
    │   ├── __init__.py
    │   ├── adaptive_loss.py
    │   ├── composite_loss.py
    │   ├── cross_entropy.py
    │   ├── ctc.py
    │   ├── fairseq_criterion.py
    │   ├── label_smoothed_cross_entropy.py
    │   ├── label_smoothed_cross_entropy_split.py
    │   ├── label_smoothed_cross_entropy_with_alignment.py
    │   ├── legacy_masked_lm.py
    │   ├── masked_lm.py
    │   ├── nat_loss.py
    │   ├── sentence_prediction.py
    │   ├── sentence_ranking.py
    │   └── wav2vec_criterion.py
    ├── data
    │   ├── __init__.py
    │   ├── add_target_dataset.py
    │   ├── append_token_dataset.py
    │   ├── audio
    │   │   ├── __init__.py
    │   │   └── raw_audio_dataset.py
    │   ├── backtranslation_dataset.py
    │   ├── base_wrapper_dataset.py
    │   ├── bucket_pad_length_dataset.py
    │   ├── colorize_dataset.py
    │   ├── concat_dataset.py
    │   ├── concat_sentences_dataset.py
    │   ├── data_utils.py
    │   ├── data_utils_fast.pyx
    │   ├── denoising_dataset.py
    │   ├── dictionary.py
    │   ├── encoders
    │   │   ├── __init__.py
    │   │   ├── byte_bpe.py
    │   │   ├── byte_utils.py
    │   │   ├── bytes.py
    │   │   ├── characters.py
    │   │   ├── fastbpe.py
    │   │   ├── gpt2_bpe.py
    │   │   ├── gpt2_bpe_utils.py
    │   │   ├── hf_bert_bpe.py
    │   │   ├── hf_byte_bpe.py
    │   │   ├── moses_tokenizer.py
    │   │   ├── nltk_tokenizer.py
    │   │   ├── sentencepiece_bpe.py
    │   │   ├── space_tokenizer.py
    │   │   ├── subword_nmt_bpe.py
    │   │   └── utils.py
    │   ├── fairseq_dataset.py
    │   ├── fasta_dataset.py
    │   ├── id_dataset.py
    │   ├── indexed_dataset.py
    │   ├── iterators.py
    │   ├── language_pair_dataset.py
    │   ├── legacy
    │   │   ├── __init__.py
    │   │   ├── block_pair_dataset.py
    │   │   ├── masked_lm_dataset.py
    │   │   └── masked_lm_dictionary.py
    │   ├── list_dataset.py
    │   ├── lm_context_window_dataset.py
    │   ├── lru_cache_dataset.py
    │   ├── mask_tokens_dataset.py
    │   ├── monolingual_dataset.py
    │   ├── multi_corpus_dataset.py
    │   ├── multi_corpus_sampled_dataset.py
    │   ├── multilingual
    │   │   ├── __init__.py
    │   │   ├── multilingual_data_manager.py
    │   │   ├── multilingual_utils.py
    │   │   ├── sampled_multi_dataset.py
    │   │   ├── sampled_multi_epoch_dataset.py
    │   │   └── sampling_method.py
    │   ├── nested_dictionary_dataset.py
    │   ├── noising.py
    │   ├── num_samples_dataset.py
    │   ├── numel_dataset.py
    │   ├── offset_tokens_dataset.py
    │   ├── pad_dataset.py
    │   ├── plasma_utils.py
    │   ├── prepend_dataset.py
    │   ├── prepend_token_dataset.py
    │   ├── raw_label_dataset.py
    │   ├── replace_dataset.py
    │   ├── resampling_dataset.py
    │   ├── roll_dataset.py
    │   ├── round_robin_zip_datasets.py
    │   ├── shorten_dataset.py
    │   ├── sort_dataset.py
    │   ├── strip_token_dataset.py
    │   ├── subsample_dataset.py
    │   ├── token_block_dataset.py
    │   ├── token_block_utils_fast.pyx
    │   ├── token_label_block_dataset.py
    │   ├── transform_eos_dataset.py
    │   └── transform_eos_lang_pair_dataset.py
    ├── dataclass
    │   ├── __init__.py
    │   ├── data_class.py
    │   └── utils.py
    ├── distributed_utils.py
    ├── file_io.py
    ├── file_utils.py
    ├── hub_utils.py
    ├── incremental_decoding_utils.py
    ├── iterative_refinement_generator.py
    ├── legacy_distributed_data_parallel.py
    ├── logging
    │   ├── __init__.py
    │   ├── meters.py
    │   ├── metrics.py
    │   └── progress_bar.py
    ├── model_parallel
    │   ├── __init__.py
    │   ├── criterions
    │   │   ├── __init__.py
    │   │   └── vocab_parallel_cross_entropy.py
    │   ├── megatron
    │   │   ├── .gitignore
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── arguments.py
    │   │   ├── configure_data.py
    │   │   ├── data_utils
    │   │   │   ├── __init__.py
    │   │   │   ├── corpora.py
    │   │   │   ├── datasets.py
    │   │   │   ├── file_utils.py
    │   │   │   ├── lazy_loader.py
    │   │   │   ├── samplers.py
    │   │   │   ├── tf_dl.py
    │   │   │   ├── tokenization.py
    │   │   │   ├── tokenization_gpt2.py
    │   │   │   └── wordpiece.py
    │   │   ├── detokenizer.py
    │   │   ├── docker
    │   │   │   ├── Dockerfile
    │   │   │   ├── README.md
    │   │   │   └── requirements.txt
    │   │   ├── evaluate_gpt2.py
    │   │   ├── fp16
    │   │   │   ├── __init__.py
    │   │   │   ├── fp16.py
    │   │   │   ├── fp16util.py
    │   │   │   └── loss_scaler.py
    │   │   ├── generate_samples.py
    │   │   ├── gpt2_data_loader.py
    │   │   ├── learning_rates.py
    │   │   ├── model
    │   │   │   ├── __init__.py
    │   │   │   ├── distributed.py
    │   │   │   ├── gpt2_modeling.py
    │   │   │   ├── model.py
    │   │   │   └── modeling.py
    │   │   ├── mpu
    │   │   │   ├── __init__.py
    │   │   │   ├── cross_entropy.py
    │   │   │   ├── data.py
    │   │   │   ├── grads.py
    │   │   │   ├── initialize.py
    │   │   │   ├── layers.py
    │   │   │   ├── mappings.py
    │   │   │   ├── random.py
    │   │   │   ├── tests
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── commons.py
    │   │   │   │   ├── test_cross_entropy.py
    │   │   │   │   ├── test_data.py
    │   │   │   │   ├── test_initialize.py
    │   │   │   │   ├── test_layers.py
    │   │   │   │   └── test_random.py
    │   │   │   ├── transformer.py
    │   │   │   └── utils.py
    │   │   ├── openwebtext
    │   │   │   ├── README.md
    │   │   │   ├── blacklist_urls.py
    │   │   │   ├── cleanup_dataset.py
    │   │   │   ├── find_duplicates.py
    │   │   │   ├── group_duplicates_url.py
    │   │   │   ├── make_gpt2_dataset.py
    │   │   │   ├── make_gpt2_sizes.py
    │   │   │   ├── merge_jsons.py
    │   │   │   ├── remove_group_duplicates.py
    │   │   │   ├── run_make_gpt2_dataset.sh
    │   │   │   └── tokenizer.py
    │   │   ├── pretrain_bert.py
    │   │   ├── pretrain_gpt2.py
    │   │   ├── requirements.txt
    │   │   ├── scripts
    │   │   │   ├── generate_text.sh
    │   │   │   ├── presplit_sentences_json.py
    │   │   │   ├── pretrain_bert.sh
    │   │   │   ├── pretrain_bert_distributed.sh
    │   │   │   ├── pretrain_bert_model_parallel.sh
    │   │   │   ├── pretrain_bert_sentencepiece.sh
    │   │   │   ├── pretrain_bert_tfrecords_distributed.sh
    │   │   │   ├── pretrain_gpt2.sh
    │   │   │   ├── pretrain_gpt2_distributed.sh
    │   │   │   ├── pretrain_gpt2_model_parallel.sh
    │   │   │   ├── run_gpt2_eval.py
    │   │   │   ├── split_gpt2_json.py
    │   │   │   └── split_json.py
    │   │   └── utils.py
    │   ├── megatron_trainer.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── pipeline_parallel_transformer
    │   │   │   ├── __init__.py
    │   │   │   ├── layers.py
    │   │   │   └── model.py
    │   │   ├── roberta
    │   │   │   ├── __init__.py
    │   │   │   └── model.py
    │   │   ├── transformer.py
    │   │   └── transformer_lm.py
    │   └── modules
    │   │   ├── __init__.py
    │   │   ├── multihead_attention.py
    │   │   ├── transformer_layer.py
    │   │   ├── transformer_sentence_encoder.py
    │   │   └── transformer_sentence_encoder_layer.py
    ├── models
    │   ├── __init__.py
    │   ├── bart
    │   │   ├── __init__.py
    │   │   ├── hub_interface.py
    │   │   └── model.py
    │   ├── composite_encoder.py
    │   ├── distributed_fairseq_model.py
    │   ├── fairseq_decoder.py
    │   ├── fairseq_encoder.py
    │   ├── fairseq_incremental_decoder.py
    │   ├── fairseq_model.py
    │   ├── fconv.py
    │   ├── fconv_lm.py
    │   ├── fconv_self_att.py
    │   ├── huggingface
    │   │   ├── __init__.py
    │   │   ├── hf_gpt2.py
    │   │   └── transformers
    │   │   │   ├── .circleci
    │   │   │       ├── config.yml
    │   │   │       └── deploy.sh
    │   │   │   ├── .coveragerc
    │   │   │   ├── .github
    │   │   │       ├── ISSUE_TEMPLATE
    │   │   │       │   ├── ---new-benchmark.md
    │   │   │       │   ├── --new-model-addition.md
    │   │   │       │   ├── bug-report.md
    │   │   │       │   ├── feature-request.md
    │   │   │       │   ├── migration.md
    │   │   │       │   └── question-help.md
    │   │   │       ├── stale.yml
    │   │   │       └── workflows
    │   │   │       │   ├── github-push.yml
    │   │   │       │   ├── self-push.yml
    │   │   │       │   └── self-scheduled.yml
    │   │   │   ├── .gitignore
    │   │   │   ├── CONTRIBUTING.md
    │   │   │   ├── LICENSE
    │   │   │   ├── MANIFEST.in
    │   │   │   ├── Makefile
    │   │   │   ├── README.md
    │   │   │   ├── deploy_multi_version_doc.sh
    │   │   │   ├── docker
    │   │   │       └── Dockerfile
    │   │   │   ├── docs
    │   │   │       ├── Makefile
    │   │   │       ├── README.md
    │   │   │       └── source
    │   │   │       │   ├── _static
    │   │   │       │       ├── css
    │   │   │       │       │   ├── Calibre-Light.ttf
    │   │   │       │       │   ├── Calibre-Medium.otf
    │   │   │       │       │   ├── Calibre-Regular.otf
    │   │   │       │       │   ├── Calibre-Thin.otf
    │   │   │       │       │   ├── code-snippets.css
    │   │   │       │       │   └── huggingface.css
    │   │   │       │       └── js
    │   │   │       │       │   ├── custom.js
    │   │   │       │       │   └── huggingface_logo.svg
    │   │   │       │   ├── benchmarks.md
    │   │   │       │   ├── bertology.rst
    │   │   │       │   ├── conf.py
    │   │   │       │   ├── converting_tensorflow_models.rst
    │   │   │       │   ├── examples.md
    │   │   │       │   ├── favicon.ico
    │   │   │       │   ├── glossary.rst
    │   │   │       │   ├── imgs
    │   │   │       │       ├── transformers_logo_name.png
    │   │   │       │       ├── warmup_constant_schedule.png
    │   │   │       │       ├── warmup_cosine_hard_restarts_schedule.png
    │   │   │       │       ├── warmup_cosine_schedule.png
    │   │   │       │       ├── warmup_cosine_warm_restarts_schedule.png
    │   │   │       │       └── warmup_linear_schedule.png
    │   │   │       │   ├── index.rst
    │   │   │       │   ├── installation.md
    │   │   │       │   ├── main_classes
    │   │   │       │       ├── configuration.rst
    │   │   │       │       ├── model.rst
    │   │   │       │       ├── optimizer_schedules.rst
    │   │   │       │       ├── pipelines.rst
    │   │   │       │       ├── processors.rst
    │   │   │       │       └── tokenizer.rst
    │   │   │       │   ├── migration.md
    │   │   │       │   ├── model_doc
    │   │   │       │       ├── albert.rst
    │   │   │       │       ├── auto.rst
    │   │   │       │       ├── bart.rst
    │   │   │       │       ├── bert.rst
    │   │   │       │       ├── camembert.rst
    │   │   │       │       ├── ctrl.rst
    │   │   │       │       ├── distilbert.rst
    │   │   │       │       ├── flaubert.rst
    │   │   │       │       ├── gpt.rst
    │   │   │       │       ├── gpt2.rst
    │   │   │       │       ├── roberta.rst
    │   │   │       │       ├── transformerxl.rst
    │   │   │       │       ├── xlm.rst
    │   │   │       │       ├── xlmroberta.rst
    │   │   │       │       └── xlnet.rst
    │   │   │       │   ├── model_sharing.md
    │   │   │       │   ├── multilingual.rst
    │   │   │       │   ├── notebooks.rst
    │   │   │       │   ├── pretrained_models.rst
    │   │   │       │   ├── quickstart.md
    │   │   │       │   ├── serialization.rst
    │   │   │       │   ├── torchscript.rst
    │   │   │       │   └── usage.rst
    │   │   │   ├── examples
    │   │   │       ├── README.md
    │   │   │       ├── benchmarks.py
    │   │   │       ├── contrib
    │   │   │       │   ├── README.md
    │   │   │       │   ├── run_camembert.py
    │   │   │       │   ├── run_openai_gpt.py
    │   │   │       │   ├── run_swag.py
    │   │   │       │   └── run_transfo_xl.py
    │   │   │       ├── distillation
    │   │   │       │   ├── README.md
    │   │   │       │   ├── distiller.py
    │   │   │       │   ├── grouped_batch_sampler.py
    │   │   │       │   ├── lm_seqs_dataset.py
    │   │   │       │   ├── requirements.txt
    │   │   │       │   ├── run_squad_w_distillation.py
    │   │   │       │   ├── scripts
    │   │   │       │   │   ├── binarized_data.py
    │   │   │       │   │   ├── extract.py
    │   │   │       │   │   ├── extract_distilbert.py
    │   │   │       │   │   └── token_counts.py
    │   │   │       │   ├── train.py
    │   │   │       │   ├── training_configs
    │   │   │       │   │   ├── distilbert-base-cased.json
    │   │   │       │   │   ├── distilbert-base-multilingual-cased.json
    │   │   │       │   │   ├── distilbert-base-uncased.json
    │   │   │       │   │   ├── distilgpt2.json
    │   │   │       │   │   └── distilroberta-base.json
    │   │   │       │   └── utils.py
    │   │   │       ├── hans
    │   │   │       │   ├── hans_processors.py
    │   │   │       │   ├── test_hans.py
    │   │   │       │   └── utils_hans.py
    │   │   │       ├── mm-imdb
    │   │   │       │   ├── run_mmimdb.py
    │   │   │       │   └── utils_mmimdb.py
    │   │   │       ├── ner
    │   │   │       │   ├── README.md
    │   │   │       │   ├── run.sh
    │   │   │       │   ├── run_ner.py
    │   │   │       │   ├── run_pl.sh
    │   │   │       │   ├── run_pl_ner.py
    │   │   │       │   ├── run_tf_ner.py
    │   │   │       │   ├── transformer_base.py
    │   │   │       │   └── utils_ner.py
    │   │   │       ├── pplm
    │   │   │       │   ├── README.md
    │   │   │       │   ├── imgs
    │   │   │       │   │   ├── headfigure.png
    │   │   │       │   │   └── wooly.png
    │   │   │       │   ├── pplm_classification_head.py
    │   │   │       │   ├── run_pplm.py
    │   │   │       │   └── run_pplm_discrim_train.py
    │   │   │       ├── requirements.txt
    │   │   │       ├── run_bertology.py
    │   │   │       ├── run_generation.py
    │   │   │       ├── run_glue.py
    │   │   │       ├── run_language_modeling.py
    │   │   │       ├── run_multiple_choice.py
    │   │   │       ├── run_squad.py
    │   │   │       ├── run_tf_glue.py
    │   │   │       ├── run_xnli.py
    │   │   │       ├── summarization
    │   │   │       │   ├── README.md
    │   │   │       │   ├── configuration_bertabs.py
    │   │   │       │   ├── convert_bertabs_original_pytorch_checkpoint.py
    │   │   │       │   ├── modeling_bertabs.py
    │   │   │       │   ├── requirements.txt
    │   │   │       │   ├── run_summarization.py
    │   │   │       │   ├── test_utils_summarization.py
    │   │   │       │   └── utils_summarization.py
    │   │   │       ├── test_examples.py
    │   │   │       ├── tests_samples
    │   │   │       │   ├── .gitignore
    │   │   │       │   ├── MRPC
    │   │   │       │   │   ├── dev.tsv
    │   │   │       │   │   └── train.tsv
    │   │   │       │   └── SQUAD
    │   │   │       │   │   ├── dev-v2.0.json
    │   │   │       │   │   └── train-v2.0.json
    │   │   │       └── utils_multiple_choice.py
    │   │   │   ├── hubconf.py
    │   │   │   ├── model_cards
    │   │   │       ├── KB
    │   │   │       │   ├── albert-base-swedish-cased-alpha
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-base-swedish-cased-ner
    │   │   │       │   │   └── README.md
    │   │   │       │   └── bert-base-swedish-cased
    │   │   │       │   │   └── README.md
    │   │   │       ├── Musixmatch
    │   │   │       │   ├── umberto-commoncrawl-cased-v1
    │   │   │       │   │   └── README.md
    │   │   │       │   └── umberto-wikipedia-uncased-v1
    │   │   │       │   │   └── README.md
    │   │   │       ├── ahotrod
    │   │   │       │   ├── albert_xxlargev1_squad2_512
    │   │   │       │   │   └── README.md
    │   │   │       │   └── xlnet_large_squad2_512
    │   │   │       │   │   └── README.md
    │   │   │       ├── bert-base-german-cased-README.md
    │   │   │       ├── binwang
    │   │   │       │   └── xlnet-base-cased
    │   │   │       │   │   └── README.md
    │   │   │       ├── canwenxu
    │   │   │       │   └── BERT-of-Theseus-MNLI
    │   │   │       │   │   └── README.md
    │   │   │       ├── dbmdz
    │   │   │       │   ├── bert-base-german-cased
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-base-german-europeana-cased
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-base-german-europeana-uncased
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-base-german-uncased
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-base-italian-cased
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-base-italian-uncased
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-base-italian-xxl-cased
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-base-italian-xxl-uncased
    │   │   │       │   │   └── README.md
    │   │   │       │   └── bert-base-turkish-cased
    │   │   │       │   │   └── README.md
    │   │   │       ├── deepset
    │   │   │       │   └── roberta-base-squad2
    │   │   │       │   │   └── README.md
    │   │   │       ├── fmikaelian
    │   │   │       │   ├── camembert-base-fquad
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── camembert-base-squad
    │   │   │       │   │   └── README.md
    │   │   │       │   └── flaubert-base-uncased-squad
    │   │   │       │   │   └── README.md
    │   │   │       ├── henryk
    │   │   │       │   └── bert-base-multilingual-cased-finetuned-dutch-squad2
    │   │   │       │   │   └── README.md
    │   │   │       ├── jplu
    │   │   │       │   ├── tf-camembert-base
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── tf-xlm-roberta-base
    │   │   │       │   │   └── README.md
    │   │   │       │   └── tf-xlm-roberta-large
    │   │   │       │   │   └── README.md
    │   │   │       ├── julien-c
    │   │   │       │   ├── EsperBERTo-small-pos
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── EsperBERTo-small
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-xsmall-dummy
    │   │   │       │   │   └── README.md
    │   │   │       │   └── dummy-unknown
    │   │   │       │   │   └── README.md
    │   │   │       ├── lysandre
    │   │   │       │   ├── arxiv-nlp
    │   │   │       │   │   └── README.md
    │   │   │       │   └── arxiv
    │   │   │       │   │   └── README.md
    │   │   │       ├── mrm8488
    │   │   │       │   ├── bert-base-spanish-wwm-cased-finetuned-spa-squad2-es
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-spanish-cased-finetuned-ner
    │   │   │       │   │   └── README.md
    │   │   │       │   ├── bert-spanish-cased-finetuned-pos
    │   │   │       │   │   └── README.md
    │   │   │       │   └── distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es
    │   │   │       │   │   └── README.md
    │   │   │       ├── nlpaueb
    │   │   │       │   └── bert-base-greek-uncased-v1
    │   │   │       │   │   └── README.md
    │   │   │       ├── nlptown
    │   │   │       │   └── bert-base-multilingual-uncased-sentiment
    │   │   │       │   │   └── README.md
    │   │   │       └── severinsimmler
    │   │   │       │   └── literary-german-bert
    │   │   │       │       ├── README.md
    │   │   │       │       ├── kfold.png
    │   │   │       │       └── prosa-jahre.png
    │   │   │   ├── notebooks
    │   │   │       ├── Comparing-PT-and-TF-models.ipynb
    │   │   │       ├── Comparing-TF-and-PT-models-MLM-NSP.ipynb
    │   │   │       ├── Comparing-TF-and-PT-models-SQuAD.ipynb
    │   │   │       └── Comparing-TF-and-PT-models.ipynb
    │   │   │   ├── setup.cfg
    │   │   │   ├── setup.py
    │   │   │   ├── src
    │   │   │       └── transformers
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── activations.py
    │   │   │       │   ├── commands
    │   │   │       │       ├── __init__.py
    │   │   │       │       ├── convert.py
    │   │   │       │       ├── download.py
    │   │   │       │       ├── env.py
    │   │   │       │       ├── run.py
    │   │   │       │       ├── serving.py
    │   │   │       │       ├── train.py
    │   │   │       │       └── user.py
    │   │   │       │   ├── configuration_albert.py
    │   │   │       │   ├── configuration_auto.py
    │   │   │       │   ├── configuration_bart.py
    │   │   │       │   ├── configuration_bert.py
    │   │   │       │   ├── configuration_camembert.py
    │   │   │       │   ├── configuration_ctrl.py
    │   │   │       │   ├── configuration_distilbert.py
    │   │   │       │   ├── configuration_flaubert.py
    │   │   │       │   ├── configuration_gpt2.py
    │   │   │       │   ├── configuration_mmbt.py
    │   │   │       │   ├── configuration_openai.py
    │   │   │       │   ├── configuration_roberta.py
    │   │   │       │   ├── configuration_t5.py
    │   │   │       │   ├── configuration_transfo_xl.py
    │   │   │       │   ├── configuration_utils.py
    │   │   │       │   ├── configuration_xlm.py
    │   │   │       │   ├── configuration_xlm_roberta.py
    │   │   │       │   ├── configuration_xlnet.py
    │   │   │       │   ├── convert_albert_original_tf_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_bart_original_pytorch_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_bert_original_tf_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_bert_pytorch_checkpoint_to_original_tf.py
    │   │   │       │   ├── convert_gpt2_original_tf_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_openai_original_tf_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_pytorch_checkpoint_to_tf2.py
    │   │   │       │   ├── convert_roberta_original_pytorch_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_t5_original_tf_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_xlm_original_pytorch_checkpoint_to_pytorch.py
    │   │   │       │   ├── convert_xlnet_original_tf_checkpoint_to_pytorch.py
    │   │   │       │   ├── data
    │   │   │       │       ├── __init__.py
    │   │   │       │       ├── metrics
    │   │   │       │       │   ├── __init__.py
    │   │   │       │       │   └── squad_metrics.py
    │   │   │       │       └── processors
    │   │   │       │       │   ├── __init__.py
    │   │   │       │       │   ├── glue.py
    │   │   │       │       │   ├── squad.py
    │   │   │       │       │   ├── utils.py
    │   │   │       │       │   └── xnli.py
    │   │   │       │   ├── file_utils.py
    │   │   │       │   ├── hf_api.py
    │   │   │       │   ├── modelcard.py
    │   │   │       │   ├── modeling_albert.py
    │   │   │       │   ├── modeling_auto.py
    │   │   │       │   ├── modeling_bart.py
    │   │   │       │   ├── modeling_bert.py
    │   │   │       │   ├── modeling_camembert.py
    │   │   │       │   ├── modeling_ctrl.py
    │   │   │       │   ├── modeling_distilbert.py
    │   │   │       │   ├── modeling_encoder_decoder.py
    │   │   │       │   ├── modeling_flaubert.py
    │   │   │       │   ├── modeling_gpt2.py
    │   │   │       │   ├── modeling_mmbt.py
    │   │   │       │   ├── modeling_openai.py
    │   │   │       │   ├── modeling_roberta.py
    │   │   │       │   ├── modeling_t5.py
    │   │   │       │   ├── modeling_tf_albert.py
    │   │   │       │   ├── modeling_tf_auto.py
    │   │   │       │   ├── modeling_tf_bert.py
    │   │   │       │   ├── modeling_tf_camembert.py
    │   │   │       │   ├── modeling_tf_ctrl.py
    │   │   │       │   ├── modeling_tf_distilbert.py
    │   │   │       │   ├── modeling_tf_gpt2.py
    │   │   │       │   ├── modeling_tf_openai.py
    │   │   │       │   ├── modeling_tf_pytorch_utils.py
    │   │   │       │   ├── modeling_tf_roberta.py
    │   │   │       │   ├── modeling_tf_t5.py
    │   │   │       │   ├── modeling_tf_transfo_xl.py
    │   │   │       │   ├── modeling_tf_transfo_xl_utilities.py
    │   │   │       │   ├── modeling_tf_utils.py
    │   │   │       │   ├── modeling_tf_xlm.py
    │   │   │       │   ├── modeling_tf_xlm_roberta.py
    │   │   │       │   ├── modeling_tf_xlnet.py
    │   │   │       │   ├── modeling_transfo_xl.py
    │   │   │       │   ├── modeling_transfo_xl_utilities.py
    │   │   │       │   ├── modeling_utils.py
    │   │   │       │   ├── modeling_xlm.py
    │   │   │       │   ├── modeling_xlm_roberta.py
    │   │   │       │   ├── modeling_xlnet.py
    │   │   │       │   ├── optimization.py
    │   │   │       │   ├── optimization_tf.py
    │   │   │       │   ├── pipelines.py
    │   │   │       │   ├── tokenization_albert.py
    │   │   │       │   ├── tokenization_auto.py
    │   │   │       │   ├── tokenization_bart.py
    │   │   │       │   ├── tokenization_bert.py
    │   │   │       │   ├── tokenization_bert_japanese.py
    │   │   │       │   ├── tokenization_camembert.py
    │   │   │       │   ├── tokenization_ctrl.py
    │   │   │       │   ├── tokenization_distilbert.py
    │   │   │       │   ├── tokenization_flaubert.py
    │   │   │       │   ├── tokenization_gpt2.py
    │   │   │       │   ├── tokenization_openai.py
    │   │   │       │   ├── tokenization_roberta.py
    │   │   │       │   ├── tokenization_t5.py
    │   │   │       │   ├── tokenization_transfo_xl.py
    │   │   │       │   ├── tokenization_utils.py
    │   │   │       │   ├── tokenization_xlm.py
    │   │   │       │   ├── tokenization_xlm_roberta.py
    │   │   │       │   ├── tokenization_xlnet.py
    │   │   │       │   └── utils_encoder_decoder.py
    │   │   │   ├── templates
    │   │   │       ├── adding_a_new_example_script
    │   │   │       │   ├── README.md
    │   │   │       │   ├── run_xxx.py
    │   │   │       │   └── utils_xxx.py
    │   │   │       └── adding_a_new_model
    │   │   │       │   ├── README.md
    │   │   │       │   ├── configuration_xxx.py
    │   │   │       │   ├── convert_xxx_original_tf_checkpoint_to_pytorch.py
    │   │   │       │   ├── modeling_tf_xxx.py
    │   │   │       │   ├── modeling_xxx.py
    │   │   │       │   ├── tests
    │   │   │       │       ├── test_modeling_tf_xxx.py
    │   │   │       │       ├── test_modeling_xxx.py
    │   │   │       │       └── test_tokenization_xxx.py
    │   │   │       │   └── tokenization_xxx.py
    │   │   │   ├── tests
    │   │   │       ├── __init__.py
    │   │   │       ├── fixtures
    │   │   │       │   ├── dummy-config.json
    │   │   │       │   ├── empty.txt
    │   │   │       │   ├── input.txt
    │   │   │       │   ├── sample_text.txt
    │   │   │       │   ├── spiece.model
    │   │   │       │   └── test_sentencepiece.model
    │   │   │       ├── test_activations.py
    │   │   │       ├── test_configuration_auto.py
    │   │   │       ├── test_configuration_common.py
    │   │   │       ├── test_doc_samples.py
    │   │   │       ├── test_hf_api.py
    │   │   │       ├── test_model_card.py
    │   │   │       ├── test_modeling_albert.py
    │   │   │       ├── test_modeling_auto.py
    │   │   │       ├── test_modeling_bart.py
    │   │   │       ├── test_modeling_bert.py
    │   │   │       ├── test_modeling_common.py
    │   │   │       ├── test_modeling_ctrl.py
    │   │   │       ├── test_modeling_distilbert.py
    │   │   │       ├── test_modeling_flaubert.py
    │   │   │       ├── test_modeling_gpt2.py
    │   │   │       ├── test_modeling_openai.py
    │   │   │       ├── test_modeling_roberta.py
    │   │   │       ├── test_modeling_t5.py
    │   │   │       ├── test_modeling_tf_albert.py
    │   │   │       ├── test_modeling_tf_auto.py
    │   │   │       ├── test_modeling_tf_bert.py
    │   │   │       ├── test_modeling_tf_common.py
    │   │   │       ├── test_modeling_tf_ctrl.py
    │   │   │       ├── test_modeling_tf_distilbert.py
    │   │   │       ├── test_modeling_tf_gpt2.py
    │   │   │       ├── test_modeling_tf_openai_gpt.py
    │   │   │       ├── test_modeling_tf_roberta.py
    │   │   │       ├── test_modeling_tf_t5.py
    │   │   │       ├── test_modeling_tf_transfo_xl.py
    │   │   │       ├── test_modeling_tf_xlm.py
    │   │   │       ├── test_modeling_tf_xlnet.py
    │   │   │       ├── test_modeling_transfo_xl.py
    │   │   │       ├── test_modeling_xlm.py
    │   │   │       ├── test_modeling_xlm_roberta.py
    │   │   │       ├── test_modeling_xlnet.py
    │   │   │       ├── test_optimization.py
    │   │   │       ├── test_optimization_tf.py
    │   │   │       ├── test_pipelines.py
    │   │   │       ├── test_tokenization_albert.py
    │   │   │       ├── test_tokenization_auto.py
    │   │   │       ├── test_tokenization_bert.py
    │   │   │       ├── test_tokenization_bert_japanese.py
    │   │   │       ├── test_tokenization_common.py
    │   │   │       ├── test_tokenization_ctrl.py
    │   │   │       ├── test_tokenization_distilbert.py
    │   │   │       ├── test_tokenization_fast.py
    │   │   │       ├── test_tokenization_gpt2.py
    │   │   │       ├── test_tokenization_openai.py
    │   │   │       ├── test_tokenization_roberta.py
    │   │   │       ├── test_tokenization_t5.py
    │   │   │       ├── test_tokenization_transfo_xl.py
    │   │   │       ├── test_tokenization_utils.py
    │   │   │       ├── test_tokenization_xlm.py
    │   │   │       ├── test_tokenization_xlm_roberta.py
    │   │   │       ├── test_tokenization_xlnet.py
    │   │   │       └── utils.py
    │   │   │   ├── transformers-cli
    │   │   │   ├── utils
    │   │   │       ├── download_glue_data.py
    │   │   │       └── link_tester.py
    │   │   │   └── valohai.yaml
    │   ├── lightconv.py
    │   ├── lightconv_lm.py
    │   ├── lstm.py
    │   ├── lstm_lm.py
    │   ├── masked_lm.py
    │   ├── model_utils.py
    │   ├── multilingual_transformer.py
    │   ├── nat
    │   │   ├── __init__.py
    │   │   ├── cmlm_transformer.py
    │   │   ├── fairseq_nat_model.py
    │   │   ├── insertion_transformer.py
    │   │   ├── iterative_nonautoregressive_transformer.py
    │   │   ├── levenshtein_transformer.py
    │   │   ├── levenshtein_utils.py
    │   │   ├── nat_crf_transformer.py
    │   │   ├── nonautoregressive_ensembles.py
    │   │   └── nonautoregressive_transformer.py
    │   ├── roberta
    │   │   ├── __init__.py
    │   │   ├── alignment_utils.py
    │   │   ├── hub_interface.py
    │   │   ├── model.py
    │   │   ├── model_camembert.py
    │   │   └── model_xlmr.py
    │   ├── transformer.py
    │   ├── transformer_align.py
    │   ├── transformer_from_pretrained_xlm.py
    │   ├── transformer_lm.py
    │   └── wav2vec
    │   │   ├── __init__.py
    │   │   ├── wav2vec.py
    │   │   ├── wav2vec2.py
    │   │   └── wav2vec2_asr.py
    ├── modules
    │   ├── __init__.py
    │   ├── adaptive_input.py
    │   ├── adaptive_softmax.py
    │   ├── beamable_mm.py
    │   ├── character_token_embedder.py
    │   ├── conv_tbc.py
    │   ├── cross_entropy.py
    │   ├── cuda_utils.cu
    │   ├── downsampled_multihead_attention.py
    │   ├── dynamic_convolution.py
    │   ├── dynamic_crf_layer.py
    │   ├── dynamicconv_layer
    │   │   ├── __init__.py
    │   │   ├── cuda_function_gen.py
    │   │   ├── dynamicconv_cuda.cpp
    │   │   ├── dynamicconv_cuda.cuh
    │   │   ├── dynamicconv_cuda_kernel.cu
    │   │   ├── dynamicconv_layer.py
    │   │   ├── dynamiconv_cpu.cpp
    │   │   └── setup.py
    │   ├── fairseq_dropout.py
    │   ├── fp32_group_norm.py
    │   ├── gelu.py
    │   ├── grad_multiply.py
    │   ├── gumbel_vector_quantizer.py
    │   ├── kmeans_vector_quantizer.py
    │   ├── layer_drop.py
    │   ├── layer_norm.py
    │   ├── learned_positional_embedding.py
    │   ├── lightconv_layer
    │   │   ├── __init__.py
    │   │   ├── cuda_function_gen.py
    │   │   ├── lightconv_cuda.cpp
    │   │   ├── lightconv_cuda.cuh
    │   │   ├── lightconv_cuda_kernel.cu
    │   │   ├── lightconv_layer.py
    │   │   └── setup.py
    │   ├── lightweight_convolution.py
    │   ├── linearized_convolution.py
    │   ├── multihead_attention.py
    │   ├── positional_embedding.py
    │   ├── quant_noise.py
    │   ├── quantization
    │   │   ├── __init__.py
    │   │   ├── pq
    │   │   │   ├── __init__.py
    │   │   │   ├── em.py
    │   │   │   ├── modules
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── qconv.py
    │   │   │   │   ├── qemb.py
    │   │   │   │   └── qlinear.py
    │   │   │   ├── pq.py
    │   │   │   └── utils.py
    │   │   ├── quantization_options.py
    │   │   └── scalar
    │   │   │   ├── __init__.py
    │   │   │   ├── modules
    │   │   │       ├── __init__.py
    │   │   │       ├── qact.py
    │   │   │       ├── qconv.py
    │   │   │       ├── qemb.py
    │   │   │       └── qlinear.py
    │   │   │   ├── ops.py
    │   │   │   └── utils.py
    │   ├── same_pad.py
    │   ├── scalar_bias.py
    │   ├── sinusoidal_positional_embedding.py
    │   ├── sparse_multihead_attention.py
    │   ├── sparse_transformer_sentence_encoder.py
    │   ├── sparse_transformer_sentence_encoder_layer.py
    │   ├── transformer_layer.py
    │   ├── transformer_sentence_encoder.py
    │   ├── transformer_sentence_encoder_layer.py
    │   ├── transpose_last.py
    │   ├── unfold.py
    │   └── vggblock.py
    ├── nan_detector.py
    ├── optim
    │   ├── __init__.py
    │   ├── adadelta.py
    │   ├── adafactor.py
    │   ├── adagrad.py
    │   ├── adam.py
    │   ├── adamax.py
    │   ├── bmuf.py
    │   ├── dynamic_loss_scaler.py
    │   ├── fairseq_optimizer.py
    │   ├── fp16_optimizer.py
    │   ├── fused_adam.py
    │   ├── fused_lamb.py
    │   ├── lr_scheduler
    │   │   ├── __init__.py
    │   │   ├── cosine_lr_scheduler.py
    │   │   ├── fairseq_lr_scheduler.py
    │   │   ├── fixed_schedule.py
    │   │   ├── inverse_square_root_schedule.py
    │   │   ├── polynomial_decay_schedule.py
    │   │   ├── reduce_lr_on_plateau.py
    │   │   ├── tri_stage_lr_scheduler.py
    │   │   └── triangular_lr_scheduler.py
    │   ├── nag.py
    │   ├── sgd.py
    │   └── shard.py
    ├── options.py
    ├── pdb.py
    ├── quantization_utils.py
    ├── registry.py
    ├── scoring
    │   ├── __init__.py
    │   ├── bleu.py
    │   └── wer.py
    ├── search.py
    ├── sequence_generator.py
    ├── sequence_scorer.py
    ├── tasks
    │   ├── __init__.py
    │   ├── audio_pretraining.py
    │   ├── cross_lingual_lm.py
    │   ├── denoising.py
    │   ├── fairseq_task.py
    │   ├── language_modeling.py
    │   ├── legacy_masked_lm.py
    │   ├── masked_lm.py
    │   ├── multilingual_denoising.py
    │   ├── multilingual_masked_lm.py
    │   ├── multilingual_translation.py
    │   ├── semisupervised_translation.py
    │   ├── sentence_prediction.py
    │   ├── sentence_ranking.py
    │   ├── translation.py
    │   ├── translation_from_pretrained_bart.py
    │   ├── translation_from_pretrained_xlm.py
    │   ├── translation_lev.py
    │   └── translation_multi_simple_epoch.py
    ├── token_generation_constraints.py
    ├── tokenizer.py
    ├── trainer.py
    └── utils.py
├── fairseq_cli
    ├── __init__.py
    ├── eval_lm.py
    ├── eval_tlm.py
    ├── generate.py
    ├── interactive.py
    ├── preprocess.py
    ├── score.py
    ├── train.py
    └── validate.py
├── hubconf.py
├── scripts
    ├── __init__.py
    ├── average_checkpoints.py
    ├── build_sym_alignment.py
    ├── compare_namespaces.py
    ├── compound_split_bleu.sh
    ├── constraints
    │   ├── extract.py
    │   └── validate.py
    ├── convert_dictionary.lua
    ├── convert_model.lua
    ├── count_docs.py
    ├── read_binarized.py
    ├── rm_pt.py
    ├── sacrebleu.sh
    ├── shard_docs.py
    ├── split_train_valid_docs.py
    ├── spm_decode.py
    ├── spm_encode.py
    └── spm_train.py
├── setup.py
├── tests
    ├── __init__.py
    ├── gpu
    │   ├── __init__.py
    │   ├── test_binaries_gpu.py
    │   └── transformer_quantization_config.yaml
    ├── speech_recognition
    │   ├── __init__.py
    │   ├── asr_test_base.py
    │   ├── test_collaters.py
    │   ├── test_cross_entropy.py
    │   ├── test_data_utils.py
    │   └── test_vggtransformer.py
    ├── test_average_checkpoints.py
    ├── test_backtranslation_dataset.py
    ├── test_binaries.py
    ├── test_bmuf.py
    ├── test_character_token_embedder.py
    ├── test_concat_dataset.py
    ├── test_constraints.py
    ├── test_convtbc.py
    ├── test_dictionary.py
    ├── test_export.py
    ├── test_file_io.py
    ├── test_fp16_optimizer.py
    ├── test_inference_dropout.py
    ├── test_iterators.py
    ├── test_label_smoothing.py
    ├── test_lstm_jitable.py
    ├── test_memory_efficient_fp16.py
    ├── test_metrics.py
    ├── test_multi_corpus_sampled_dataset.py
    ├── test_multihead_attention.py
    ├── test_noising.py
    ├── test_reproducibility.py
    ├── test_resampling_dataset.py
    ├── test_sequence_generator.py
    ├── test_sequence_scorer.py
    ├── test_sparse_multihead_attention.py
    ├── test_token_block_dataset.py
    ├── test_train.py
    ├── test_utils.py
    └── utils.py
└── train.py


/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## 👉 [Please follow one of these issue templates](https://github.com/pytorch/fairseq/issues/new/choose) 👈
2 | 
3 | Note: to keep the backlog clean and actionable, issues may be immediately closed if they do not follow one of the above issue templates.
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🐛 Bug Report
 3 | about: Submit a bug report to help us improve
 4 | labels: 'bug, needs triage'
 5 | ---
 6 | 
 7 | ## 🐛 Bug
 8 | 
 9 | <!-- A clear and concise description of what the bug is. -->
10 | 
11 | ### To Reproduce
12 | 
13 | Steps to reproduce the behavior (**always include the command you ran**):
14 | 
15 | 1. Run cmd '....'
16 | 2. See error
17 | 
18 | <!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
19 | 
20 | 
21 | #### Code sample
22 | <!-- Ideally attach a minimal code sample to reproduce the decried issue. 
23 | Minimal means having the shortest code but still preserving the bug. -->
24 | 
25 | ### Expected behavior
26 | 
27 | <!-- A clear and concise description of what you expected to happen. -->
28 | 
29 | ### Environment
30 | 
31 |  - fairseq Version (e.g., 1.0 or master):
32 |  - PyTorch Version (e.g., 1.0)
33 |  - OS (e.g., Linux):
34 |  - How you installed fairseq (`pip`, source):
35 |  - Build command you used (if compiling from source):
36 |  - Python version:
37 |  - CUDA/cuDNN version:
38 |  - GPU models and configuration:
39 |  - Any other relevant information:
40 | 
41 | ### Additional context
42 | 
43 | <!-- Add any other context about the problem here. -->
44 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 📚 Documentation/Typos
 3 | about: Report an issue related to documentation or a typo
 4 | labels: 'documentation, needs triage'
 5 | ---
 6 | 
 7 | ## 📚 Documentation
 8 | 
 9 | For typos and doc fixes, please go ahead and:
10 | 
11 | 1. Create an issue.
12 | 2. Fix the typo.
13 | 3. Submit a PR.
14 | 
15 | Thanks!
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🚀 Feature Request
 3 | about: Submit a proposal/request for a new feature
 4 | labels: 'enhancement, help wanted, needs triage'
 5 | ---
 6 | 
 7 | ## 🚀 Feature Request
 8 | <!-- A clear and concise description of the feature proposal -->
 9 | 
10 | ### Motivation
11 | 
12 | <!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
13 | 
14 | ### Pitch
15 | 
16 | <!-- A clear and concise description of what you want to happen. -->
17 | 
18 | ### Alternatives
19 | 
20 | <!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
21 | 
22 | ### Additional context
23 | 
24 | <!-- Add any other context or screenshots about the feature request here. -->
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/how-to-question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: ❓ Questions/Help
 3 | about: If you have questions, please first search existing issues and docs
 4 | labels: 'question, needs triage'
 5 | ---
 6 | 
 7 | ## ❓ Questions and Help
 8 | 
 9 | ### Before asking:   
10 | 1. search the issues.   
11 | 2. search the docs.    
12 | 
13 | <!-- If you still can't find what you need: -->
14 | 
15 | #### What is your question?
16 | 
17 | #### Code
18 | 
19 | <!-- Please paste a code snippet if your question requires it! -->   
20 | 
21 | #### What have you tried?
22 | 
23 | #### What's your environment?
24 | 
25 |  - fairseq Version (e.g., 1.0 or master):
26 |  - PyTorch Version (e.g., 1.0)
27 |  - OS (e.g., Linux):
28 |  - How you installed fairseq (`pip`, source):
29 |  - Build command you used (if compiling from source):
30 |  - Python version:
31 |  - CUDA/cuDNN version:
32 |  - GPU models and configuration:
33 |  - Any other relevant information:
34 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Before submitting
 2 | 
 3 | - [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
 4 | - [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/master/CONTRIBUTING.md)?
 5 | - [ ] Did you make sure to update the docs?   
 6 | - [ ] Did you write any new necessary tests?  
 7 | 
 8 | ## What does this PR do?
 9 | Fixes # (issue).
10 | 
11 | ## PR review    
12 | Anyone in the community is free to review the PR once the tests have passed.     
13 | If we didn't discuss your PR in Github issues there's a high chance it will not be merged.
14 | 
15 | ## Did you have fun?
16 | Make sure you had fun coding 🙃
17 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push to master or any pull request
 5 |   push:
 6 |     branches:
 7 |       - master
 8 |   pull_request:
 9 | 
10 | jobs:
11 |   build:
12 | 
13 |     strategy:
14 |       max-parallel: 4
15 |       matrix:
16 |         platform: [ubuntu-latest, macos-latest]
17 |         python-version: [3.6, 3.7]
18 | 
19 |     runs-on: ${{ matrix.platform }}
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v1
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v1
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Conditionally install pytorch
28 |       if: matrix.platform == 'windows-latest'
29 |       run: pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
30 |     - name: Install locally
31 |       run: |
32 |         python -m pip install --upgrade pip
33 |         python setup.py build_ext --inplace
34 |         python -m pip install --editable .
35 |     - name: Lint with flake8
36 |       run: |
37 |         pip install flake8
38 |         # stop the build if there are Python syntax errors or undefined names
39 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
40 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
41 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
42 |     - name: Run tests
43 |       run: |
44 |           python setup.py test
45 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "fairseq/models/huggingface/transformers"]
2 |     path = fairseq/models/huggingface/transformers
3 |     url = https://github.com/myleott/transformers.git
4 |     branch = fairseq
5 | [submodule "fairseq/model_parallel/megatron"]
6 |     path = fairseq/model_parallel/megatron
7 |     url = https://github.com/ngoyal2707/Megatron-LM
8 |     branch = fairseq
9 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq)
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `master`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | ## License
26 | By contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq),
27 | you agree that your contributions will be licensed under the LICENSE file in
28 | the root directory of this source tree.
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - params: training_params
3 |   - task: language_modeling
4 |   - model: transformer_lm
5 |   - criterion: cross_entropy
6 |   - optimizer: adam
7 |   - lr_scheduler: inverse_sqrt
8 | 


--------------------------------------------------------------------------------
/config/config_eval_lm.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - params: eval_lm_params
3 |   - task: language_modeling
4 |   - model: transformer_lm
5 |   - criterion: cross_entropy
6 |   - optimizer: adam
7 |   - lr_scheduler: inverse_sqrt
8 | 


--------------------------------------------------------------------------------
/config/criterion/adaptive_loss.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | sentence_avg: ${params.optimization.sentence_avg}
3 | ddp_backend: ${params.distributed_training.ddp_backend}
4 | 


--------------------------------------------------------------------------------
/config/criterion/cross_entropy.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | sentence_avg: ${params.optimization.sentence_avg}
3 | ddp_backend: ${params.distributed_training.ddp_backend}
4 | 


--------------------------------------------------------------------------------
/config/lr_scheduler/cosine.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | warmup_updates: 0
3 | warmup_init_lr: -1
4 | max_lr: 1.0
5 | t_mult: 1.0
6 | lr_period_updates: -1
7 | lr_shrink: 0.1
8 | 


--------------------------------------------------------------------------------
/config/lr_scheduler/inverse_sqrt.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | warmup_updates: 4000
3 | warmup_init_lr: -1
4 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "relu"
 3 | dropout: 0.1
 4 | attention_dropout: 0.0
 5 | activation_dropout: 0.0
 6 | relu_dropout: 0.0
 7 | decoder_embed_dim: 512
 8 | decoder_output_dim: 512
 9 | decoder_input_dim: 512
10 | decoder_ffn_embed_dim: 2048
11 | decoder_layers: 6
12 | decoder_attention_heads: 8
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: false
15 | adaptive_softmax_cutoff: null
16 | adaptive_softmax_dropout: 0
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: false
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: null
27 | tie_adaptive_weights: false
28 | tie_adaptive_proj: false
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_baevski_gbw.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "relu"
 3 | dropout: 0.1
 4 | attention_dropout: 0.1
 5 | activation_dropout: 0.0
 6 | relu_dropout: 0.0
 7 | decoder_embed_dim: 512
 8 | decoder_output_dim: 512
 9 | decoder_input_dim: 512
10 | decoder_ffn_embed_dim: 4096
11 | decoder_layers: 12
12 | decoder_attention_heads: 16
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: true
15 | adaptive_softmax_cutoff: null
16 | adaptive_softmax_dropout: 0
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: false
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: null
27 | tie_adaptive_weights: false
28 | tie_adaptive_proj: false
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_baevski_wiki103.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "relu"
 3 | dropout: 0.3
 4 | attention_dropout: 0.1
 5 | activation_dropout: 0.1
 6 | relu_dropout: 0.1
 7 | decoder_embed_dim: 1024
 8 | decoder_output_dim: 1024
 9 | decoder_input_dim: 1024
10 | decoder_ffn_embed_dim: 4096
11 | decoder_layers: 16
12 | decoder_attention_heads: 8
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: true
15 | adaptive_softmax_cutoff: "20000,60000"
16 | adaptive_softmax_dropout: 0.2
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: true
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: "20000,60000"
27 | tie_adaptive_weights: true
28 | tie_adaptive_proj: true
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_big.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "relu"
 3 | dropout: 0.1
 4 | attention_dropout: 0.0
 5 | activation_dropout: 0.0
 6 | relu_dropout: 0.0
 7 | decoder_embed_dim: 1024
 8 | decoder_output_dim: 1024
 9 | decoder_input_dim: 1024
10 | decoder_ffn_embed_dim: 4096
11 | decoder_layers: 12
12 | decoder_attention_heads: 16
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: false
15 | adaptive_softmax_cutoff: null
16 | adaptive_softmax_dropout: 0
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: false
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: null
27 | tie_adaptive_weights: false
28 | tie_adaptive_proj: false
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_gbw.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "relu"
 3 | dropout: 0.1
 4 | attention_dropout: 0.1
 5 | activation_dropout: 0.0
 6 | relu_dropout: 0.0
 7 | decoder_embed_dim: 512
 8 | decoder_output_dim: 512
 9 | decoder_input_dim: 512
10 | decoder_ffn_embed_dim: 4096
11 | decoder_layers: 12
12 | decoder_attention_heads: 16
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: true
15 | adaptive_softmax_cutoff: null
16 | adaptive_softmax_dropout: 0
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: false
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: null
27 | tie_adaptive_weights: false
28 | tie_adaptive_proj: false
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_gpt.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "gelu"
 3 | dropout: 0.1
 4 | attention_dropout: 0.1
 5 | activation_dropout: 0.0
 6 | relu_dropout: 0.0
 7 | decoder_embed_dim: 768
 8 | decoder_output_dim: 768
 9 | decoder_input_dim: 768
10 | decoder_ffn_embed_dim: 3072
11 | decoder_layers: 12
12 | decoder_attention_heads: 12
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: false
15 | adaptive_softmax_cutoff: null
16 | adaptive_softmax_dropout: 0
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: false
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: null
27 | tie_adaptive_weights: false
28 | tie_adaptive_proj: false
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_gpt2_big.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "gelu"
 3 | dropout: 0.1
 4 | attention_dropout: 0.1
 5 | activation_dropout: 0.0
 6 | relu_dropout: 0.0
 7 | decoder_embed_dim: 1600
 8 | decoder_output_dim: 1600
 9 | decoder_input_dim: 1600
10 | decoder_ffn_embed_dim: 6400
11 | decoder_layers: 48
12 | decoder_attention_heads: 25
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: false
15 | adaptive_softmax_cutoff: null
16 | adaptive_softmax_dropout: 0
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: false
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: null
27 | tie_adaptive_weights: false
28 | tie_adaptive_proj: false
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_gpt2_medium.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "gelu"
 3 | dropout: 0.1
 4 | attention_dropout: 0.1
 5 | activation_dropout: 0.0
 6 | relu_dropout: 0.0
 7 | decoder_embed_dim: 1280
 8 | decoder_output_dim: 1280
 9 | decoder_input_dim: 1280
10 | decoder_ffn_embed_dim: 5120
11 | decoder_layers: 36
12 | decoder_attention_heads: 20
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: false
15 | adaptive_softmax_cutoff: null
16 | adaptive_softmax_dropout: 0
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: false
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: null
27 | tie_adaptive_weights: false
28 | tie_adaptive_proj: false
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_gpt2_small.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "gelu"
 3 | dropout: 0.1
 4 | attention_dropout: 0.1
 5 | activation_dropout: 0.0
 6 | relu_dropout: 0.0
 7 | decoder_embed_dim: 1024
 8 | decoder_output_dim: 1024
 9 | decoder_input_dim: 1024
10 | decoder_ffn_embed_dim: 4096
11 | decoder_layers: 24
12 | decoder_attention_heads: 16
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: false
15 | adaptive_softmax_cutoff: null
16 | adaptive_softmax_dropout: 0
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: false
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: null
27 | tie_adaptive_weights: false
28 | tie_adaptive_proj: false
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/model/transformer_lm_wiki103.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | activation_fn: "relu"
 3 | dropout: 0.3
 4 | attention_dropout: 0.1
 5 | activation_dropout: 0.1
 6 | relu_dropout: 0.1
 7 | decoder_embed_dim: 1024
 8 | decoder_output_dim: 1024
 9 | decoder_input_dim: 1024
10 | decoder_ffn_embed_dim: 4096
11 | decoder_layers: 16
12 | decoder_attention_heads: 8
13 | decoder_normalize_before: true
14 | no_decoder_final_norm: true
15 | adaptive_softmax_cutoff: "20000,60000"
16 | adaptive_softmax_dropout: 0.2
17 | adaptive_softmax_factor: 4
18 | no_token_positional_embeddings: false
19 | share_decoder_input_output_embed: false
20 | character_embeddings: false
21 | character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
22 | character_embedding_dim: 4
23 | char_embedder_highway_layers: 2
24 | adaptive_input: true
25 | adaptive_input_factor: 4
26 | adaptive_input_cutoff: "20000,60000"
27 | tie_adaptive_weights: true
28 | tie_adaptive_proj: true
29 | decoder_learned_pos: false
30 | decoder_layerdrop: 0
31 | decoder_layers_to_keep: null
32 | layernorm_embedding: false
33 | no_scale_embedding: false
34 | quant_noise_pq: 0
35 | quant_noise_pq_block_size: 8
36 | quant_noise_scalar: 0
37 | 


--------------------------------------------------------------------------------
/config/optimizer/adam.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | adam_betas: "(0.9, 0.999)"
3 | adam_eps: 1.0e-8
4 | weight_decay: 0
5 | use_old_adam: false
6 | 


--------------------------------------------------------------------------------
/config/optimizer/nag.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | momentum: 0.99
3 | weight_decay: 0.0
4 | 


--------------------------------------------------------------------------------
/config/task/language_modeling.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | data: ???
 3 | sample_break_mode: "none"
 4 | tokens_per_sample: 1024
 5 | output_dictionary_size: -1
 6 | self_target: false
 7 | future_target: false
 8 | past_target: false
 9 | add_bos_token: false
10 | max_target_positions: null
11 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = fairseq
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/_static/theme_overrides.css:
--------------------------------------------------------------------------------
 1 | .wy-table-responsive table td kbd {
 2 |     white-space: nowrap;
 3 | }
 4 | .wy-table-responsive table td {
 5 |     white-space: normal !important;
 6 | }
 7 | .wy-table-responsive {
 8 |     overflow: visible !important;
 9 | }
10 | 


--------------------------------------------------------------------------------
/docs/criterions.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | .. _Criterions:
 5 | 
 6 | Criterions
 7 | ==========
 8 | 
 9 | Criterions compute the loss function given the model and batch, roughly::
10 | 
11 |   loss = criterion(model, batch)
12 | 
13 | .. automodule:: fairseq.criterions
14 |     :members:
15 | 
16 | .. autoclass:: fairseq.criterions.FairseqCriterion
17 |     :members:
18 |     :undoc-members:
19 | 
20 | .. autoclass:: fairseq.criterions.adaptive_loss.AdaptiveLoss
21 |     :members:
22 |     :undoc-members:
23 | .. autoclass:: fairseq.criterions.composite_loss.CompositeLoss
24 |     :members:
25 |     :undoc-members:
26 | .. autoclass:: fairseq.criterions.cross_entropy.CrossEntropyCriterion
27 |     :members:
28 |     :undoc-members:
29 | .. autoclass:: fairseq.criterions.label_smoothed_cross_entropy.LabelSmoothedCrossEntropyCriterion
30 |     :members:
31 |     :undoc-members:
32 | 


--------------------------------------------------------------------------------
/docs/data.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | .. module:: fairseq.data
 5 | 
 6 | Data Loading and Utilities
 7 | ==========================
 8 | 
 9 | .. _datasets:
10 | 
11 | Datasets
12 | --------
13 | 
14 | **Datasets** define the data format and provide helpers for creating
15 | mini-batches.
16 | 
17 | .. autoclass:: fairseq.data.FairseqDataset
18 |     :members:
19 | .. autoclass:: fairseq.data.LanguagePairDataset
20 |     :members:
21 | .. autoclass:: fairseq.data.MonolingualDataset
22 |     :members:
23 | 
24 | **Helper Datasets**
25 | 
26 | These datasets wrap other :class:`fairseq.data.FairseqDataset` instances and
27 | provide additional functionality:
28 | 
29 | .. autoclass:: fairseq.data.BacktranslationDataset
30 |     :members:
31 | .. autoclass:: fairseq.data.ConcatDataset
32 |     :members:
33 | .. autoclass:: fairseq.data.ResamplingDataset
34 |     :members:
35 | .. autoclass:: fairseq.data.RoundRobinZipDatasets
36 |     :members:
37 | .. autoclass:: fairseq.data.TransformEosDataset
38 |     :members:
39 | 
40 | 
41 | Dictionary
42 | ----------
43 | 
44 | .. autoclass:: fairseq.data.Dictionary
45 |     :members:
46 | 
47 | 
48 | Iterators
49 | ---------
50 | 
51 | .. autoclass:: fairseq.data.CountingIterator
52 |     :members:
53 | .. autoclass:: fairseq.data.EpochBatchIterator
54 |     :members:
55 | .. autoclass:: fairseq.data.GroupedIterator
56 |     :members:
57 | .. autoclass:: fairseq.data.ShardedIterator
58 |     :members:
59 | 


--------------------------------------------------------------------------------
/docs/docutils.conf:
--------------------------------------------------------------------------------
1 | [writers]
2 | option-limit=0
3 | 


--------------------------------------------------------------------------------
/docs/fairseq.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/docs/fairseq.gif


--------------------------------------------------------------------------------
/docs/fairseq_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/docs/fairseq_logo.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. fairseq documentation master file, created by
 2 |    sphinx-quickstart on Fri Aug 17 21:45:30 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | :github_url: https://github.com/pytorch/fairseq
 7 | 
 8 | 
 9 | fairseq documentation
10 | =====================
11 | 
12 | Fairseq is a sequence modeling toolkit written in `PyTorch
13 | <http://pytorch.org/>`_ that allows researchers and developers to
14 | train custom models for translation, summarization, language modeling and other
15 | text generation tasks.
16 | 
17 | .. toctree::
18 |     :maxdepth: 1
19 |     :caption: Getting Started
20 | 
21 |     getting_started
22 |     command_line_tools
23 | 
24 | .. toctree::
25 |     :maxdepth: 1
26 |     :caption: Extending Fairseq
27 | 
28 |     overview
29 |     tutorial_simple_lstm
30 |     tutorial_classifying_names
31 | 
32 | .. toctree::
33 |     :maxdepth: 2
34 |     :caption: Library Reference
35 | 
36 |     tasks
37 |     models
38 |     criterions
39 |     optim
40 |     lr_scheduler
41 |     data
42 |     modules
43 | 
44 | 
45 | Indices and tables
46 | ==================
47 | 
48 | * :ref:`genindex`
49 | * :ref:`search`
50 | 


--------------------------------------------------------------------------------
/docs/lr_scheduler.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | .. _Learning Rate Schedulers:
 5 | 
 6 | Learning Rate Schedulers
 7 | ========================
 8 | 
 9 | Learning Rate Schedulers update the learning rate over the course of training.
10 | Learning rates can be updated after each update via :func:`step_update` or at
11 | epoch boundaries via :func:`step`.
12 | 
13 | .. automodule:: fairseq.optim.lr_scheduler
14 |     :members:
15 | 
16 | .. autoclass:: fairseq.optim.lr_scheduler.FairseqLRScheduler
17 |     :members:
18 |     :undoc-members:
19 | 
20 | .. autoclass:: fairseq.optim.lr_scheduler.cosine_lr_scheduler.CosineSchedule
21 |     :members:
22 |     :undoc-members:
23 | .. autoclass:: fairseq.optim.lr_scheduler.fixed_schedule.FixedSchedule
24 |     :members:
25 |     :undoc-members:
26 | .. autoclass:: fairseq.optim.lr_scheduler.inverse_square_root_schedule.InverseSquareRootSchedule
27 |     :members:
28 |     :undoc-members:
29 | .. autoclass:: fairseq.optim.lr_scheduler.reduce_lr_on_plateau.ReduceLROnPlateau
30 |     :members:
31 |     :undoc-members:
32 | .. autoclass:: fairseq.optim.lr_scheduler.triangular_lr_scheduler.TriangularSchedule
33 |     :members:
34 |     :undoc-members:
35 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=fairseq
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
 1 | Modules
 2 | =======
 3 | 
 4 | Fairseq provides several stand-alone :class:`torch.nn.Module` classes that may
 5 | be helpful when implementing a new :class:`~fairseq.models.BaseFairseqModel`.
 6 | 
 7 | .. automodule:: fairseq.modules
 8 |     :members:
 9 |     :undoc-members:
10 | 


--------------------------------------------------------------------------------
/docs/optim.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | .. _optimizers:
 5 | 
 6 | Optimizers
 7 | ==========
 8 | 
 9 | Optimizers update the Model parameters based on the gradients.
10 | 
11 | .. automodule:: fairseq.optim
12 |     :members:
13 | 
14 | .. autoclass:: fairseq.optim.FairseqOptimizer
15 |     :members:
16 |     :undoc-members:
17 | 
18 | .. autoclass:: fairseq.optim.adadelta.Adadelta
19 |     :members:
20 |     :undoc-members:
21 | .. autoclass:: fairseq.optim.adagrad.Adagrad
22 |     :members:
23 |     :undoc-members:
24 | .. autoclass:: fairseq.optim.adafactor.FairseqAdafactor
25 |     :members:
26 |     :undoc-members:
27 | .. autoclass:: fairseq.optim.adam.FairseqAdam
28 |     :members:
29 |     :undoc-members:
30 | .. autoclass:: fairseq.optim.fp16_optimizer.FP16Optimizer
31 |     :members:
32 |     :undoc-members:
33 | .. autoclass:: fairseq.optim.nag.FairseqNAG
34 |     :members:
35 |     :undoc-members:
36 | .. autoclass:: fairseq.optim.sgd.SGD
37 |     :members:
38 |     :undoc-members:
39 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx<2.0
2 | sphinx-argparse
3 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | !*/*.sh
2 | !*/*.md
3 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | __version__ = '0.9.0'
7 | 
8 | import examples.noisychannel  # noqa
9 | 


--------------------------------------------------------------------------------
/examples/backtranslation/deduplicate_lines.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import argparse
 8 | import fileinput
 9 | import hashlib
10 | from multiprocessing import Pool
11 | import sys
12 | 
13 | 
14 | def get_hashes_and_lines(raw_line):
15 |     hash = hashlib.md5(raw_line).hexdigest()
16 |     return hash, raw_line
17 | 
18 | 
19 | def main():
20 |     parser = argparse.ArgumentParser()
21 |     parser.add_argument('--workers', type=int, default=10)
22 |     parser.add_argument('files', nargs='*', help='input files')
23 |     args = parser.parse_args()
24 | 
25 |     seen = set()
26 |     with fileinput.input(args.files, mode='rb') as h:
27 |         pool = Pool(args.workers)
28 |         results = pool.imap_unordered(get_hashes_and_lines, h, 1000)
29 |         for i, (hash, raw_line) in enumerate(results):
30 |             if hash not in seen:
31 |                 seen.add(hash)
32 |                 sys.stdout.buffer.write(raw_line)
33 |             if i % 1000000 == 0:
34 |                 print(i, file=sys.stderr, end="", flush=True)
35 |             elif i % 100000 == 0:
36 |                 print(".", file=sys.stderr, end="", flush=True)
37 |     print(file=sys.stderr, flush=True)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     main()
42 | 


--------------------------------------------------------------------------------
/examples/backtranslation/sacrebleu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 5 ]; then
 4 |     echo "usage: $0 [dataset=wmt14/full] [langpair=en-de] [databin] [bpecode] [model]"
 5 |     exit
 6 | fi
 7 | 
 8 | 
 9 | DATASET=$1
10 | LANGPAIR=$2
11 | DATABIN=$3
12 | BPECODE=$4
13 | MODEL=$5
14 | 
15 | SRCLANG=$(echo $LANGPAIR | cut -d '-' -f 1)
16 | TGTLANG=$(echo $LANGPAIR | cut -d '-' -f 2)
17 | 
18 | 
19 | BPEROOT=examples/backtranslation/subword-nmt/subword_nmt
20 | if [ ! -e $BPEROOT ]; then
21 |     BPEROOT=subword-nmt/subword_nmt
22 |     if [ ! -e $BPEROOT ]; then
23 |         echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
24 |         git clone https://github.com/rsennrich/subword-nmt.git
25 |     fi
26 | fi
27 | 
28 | 
29 | sacrebleu -t $DATASET -l $LANGPAIR --echo src \
30 | | sacremoses tokenize -a -l $SRCLANG -q \
31 | | python $BPEROOT/apply_bpe.py -c $BPECODE \
32 | | fairseq-interactive $DATABIN --path $MODEL \
33 |     -s $SRCLANG -t $TGTLANG \
34 |     --beam 5 --remove-bpe --buffer-size 1024 --max-tokens 8000 \
35 | | grep ^H- | cut -f 3- \
36 | | sacremoses detokenize -l $TGTLANG -q \
37 | | sacrebleu -t $DATASET -l $LANGPAIR
38 | 


--------------------------------------------------------------------------------
/examples/backtranslation/tokenized_bleu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 5 ]; then
 4 |     echo "usage: $0 [dataset=wmt14/full] [langpair=en-de] [databin] [bpecode] [model]"
 5 |     exit
 6 | fi
 7 | 
 8 | 
 9 | DATASET=$1
10 | LANGPAIR=$2
11 | DATABIN=$3
12 | BPECODE=$4
13 | MODEL=$5
14 | 
15 | SRCLANG=$(echo $LANGPAIR | cut -d '-' -f 1)
16 | TGTLANG=$(echo $LANGPAIR | cut -d '-' -f 2)
17 | 
18 | 
19 | BPEROOT=examples/backtranslation/subword-nmt/subword_nmt
20 | if [ ! -e $BPEROOT ]; then
21 |     BPEROOT=subword-nmt/subword_nmt
22 |     if [ ! -e $BPEROOT ]; then
23 |         echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
24 |         git clone https://github.com/rsennrich/subword-nmt.git
25 |     fi
26 | fi
27 | 
28 | 
29 | TMP_REF=$(mktemp)
30 | 
31 | sacrebleu -t $DATASET -l $LANGPAIR --echo ref -q \
32 | | sacremoses normalize -l $TGTLANG -q \
33 | | sacremoses tokenize -a -l $TGTLANG -q \
34 | > $TMP_REF
35 | 
36 | sacrebleu -t $DATASET -l $LANGPAIR --echo src -q \
37 | | sacremoses normalize -l $SRCLANG -q \
38 | | sacremoses tokenize -a -l $SRCLANG -q \
39 | | python $BPEROOT/apply_bpe.py -c $BPECODE \
40 | | fairseq-interactive $DATABIN --path $MODEL \
41 |     -s $SRCLANG -t $TGTLANG \
42 |     --beam 5 --remove-bpe --buffer-size 1024 --max-tokens 8000 \
43 | | grep ^H- | cut -f 3- \
44 | | fairseq-score --ref $TMP_REF
45 | 
46 | rm -f $TMP_REF
47 | 


--------------------------------------------------------------------------------
/examples/constrained_decoding/normalize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | #
 5 | # This source code is licensed under the MIT license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | import sys
 9 | 
10 | from sacremoses.normalize import MosesPunctNormalizer
11 | 
12 | 
13 | def main(args):
14 |     normalizer = MosesPunctNormalizer(lang=args.lang, penn=args.penn)
15 |     for line in sys.stdin:
16 |         print(normalizer.normalize(line.rstrip()), flush=True)
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     import argparse
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument('--lang', '-l', default='en')
23 |     parser.add_argument('--penn', '-p', action='store_true')
24 |     args = parser.parse_args()
25 | 
26 |     main(args)
27 | 


--------------------------------------------------------------------------------
/examples/constrained_decoding/tok.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | #
 5 | # This source code is licensed under the MIT license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | import sys
 9 | import sacremoses
10 | 
11 | 
12 | def main(args):
13 |     """Tokenizes, preserving tabs"""
14 |     mt = sacremoses.MosesTokenizer(lang=args.lang)
15 |     def tok(s):
16 |         return mt.tokenize(s, return_str=True)
17 | 
18 |     for line in sys.stdin:
19 |         parts = list(map(tok, line.split("\t")))
20 |         print(*parts, sep="\t", flush=True)
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     import argparse
25 |     parser = argparse.ArgumentParser()
26 |     parser.add_argument('--lang', '-l', default='en')
27 |     parser.add_argument('--penn', '-p', action='store_true')
28 |     parser.add_argument('--fields', '-f', help="fields to tokenize")
29 |     args = parser.parse_args()
30 | 
31 |     main(args)
32 | 


--------------------------------------------------------------------------------
/examples/language_model/README.conv.md:
--------------------------------------------------------------------------------
 1 | # Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017)
 2 | 
 3 | ## Example usage
 4 | 
 5 | First download and preprocess the data following the main [language modeling README](README.md).
 6 | 
 7 | Then to train a convolutional LM using the `fconv_lm_dauphin_wikitext103`
 8 | architecture:
 9 | ```bash
10 | fairseq-train --task language_modeling \
11 |     data-bin/wikitext-103 \
12 |     --save-dir checkpoints/fconv_wikitext-103 \
13 |     --arch fconv_lm_dauphin_wikitext103 \
14 |     --adaptive-softmax-cutoff 10000,20000,200000 \
15 |     --dropout 0.2 \
16 |     --criterion adaptive_loss \
17 |     --optimizer nag --clip-norm 0.1 --weight-decay 5e-06 \
18 |     --lr 1.0 --lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \
19 |     --max-tokens 1024 --tokens-per-sample 1024 \
20 |     --ddp-backend no_c10d \
21 |     --max-epoch 35
22 | ```
23 | 
24 | And evaluate with:
25 | ```bash
26 | fairseq-eval-lm data-bin/wikitext-103 --path checkpoints/fconv_wiki103/checkpoint_best.pt
27 | ```
28 | 
29 | ## Citation
30 | 
31 | ```bibtex
32 | @inproceedings{dauphin2017language,
33 |   title={Language Modeling with Gated Convolutional Networks},
34 |   author={Dauphin, Yann N and Fan, Angela and Auli, Michael and Grangier, David},
35 |   booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70},
36 |   pages={933--941},
37 |   year={2017},
38 |   organization={JMLR}
39 | }
40 | ```
41 | 


--------------------------------------------------------------------------------
/examples/language_model/prepare-wikitext-103.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh
 3 | 
 4 | URLS=(
 5 |     "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip"
 6 | )
 7 | FILES=(
 8 |     "wikitext-103-v1.zip"
 9 | )
10 | 
11 | for ((i=0;i<${#URLS[@]};++i)); do
12 |     file=${FILES[i]}
13 |     if [ -f $file ]; then
14 |         echo "$file already exists, skipping download"
15 |     else
16 |         url=${URLS[i]}
17 |         wget "$url"
18 |         if [ -f $file ]; then
19 |             echo "$url successfully downloaded."
20 |         else
21 |             echo "$url not successfully downloaded."
22 |             exit -1
23 |         fi
24 |         if [ ${file: -4} == ".tgz" ]; then
25 |             tar zxvf $file
26 |         elif [ ${file: -4} == ".tar" ]; then
27 |             tar xvf $file
28 |         elif [ ${file: -4} == ".zip" ]; then
29 |             unzip $file
30 |         fi
31 |     fi
32 | done
33 | cd ..
34 | 


--------------------------------------------------------------------------------
/examples/megatron_11b/detok.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import argparse
 8 | import fileinput
 9 | import sacremoses
10 | 
11 | 
12 | def main():
13 |     parser = argparse.ArgumentParser(description='')
14 |     parser.add_argument('files', nargs='*', help='input files')
15 |     args = parser.parse_args()
16 | 
17 |     detok = sacremoses.MosesDetokenizer()
18 | 
19 |     for line in fileinput.input(args.files, openhook=fileinput.hook_compressed):
20 |         print(detok.detokenize(line.strip().split(' ')).replace(' @', '').replace('@ ', '').replace(' =', '=').replace('= ', '=').replace(' – ', '–'))
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     main()
25 | 


--------------------------------------------------------------------------------
/examples/multilingual/finetune_multilingual_model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | path_2_data=$1  # <path to data> which contains binarized data for each directions
 4 | lang_list=$2  # <path to a file which contains a list of languages separted by new lines>
 5 | lang_pairs=$3  #a list language pairs to train multilingual models, e.g. "en-fr,en-cs,fr-en,cs-en"
 6 | # pretrained can be an mBART pretrained model as well
 7 | pretrained_model=$4 #<path to a pretrained model>
 8 | 
 9 | 
10 | fairseq-train "$path_2_data" \
11 |   --encoder-normalize-before --decoder-normalize-before \
12 |   --arch transformer --layernorm-embedding \
13 |   --task translation_multi_simple_epoch \
14 |   --finetune-from-model "$pretrained_model" \
15 |   --sampling-method "temperature" \
16 |   --sampling-temperature "1.5" \
17 |   --encoder-langtok "src" \
18 |   --decoder-langtok \
19 |   --lang-dict "$lang_list" \
20 |   --lang-pairs "$lang_pairs" \
21 |   --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \
22 |   --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \
23 |   --lr-scheduler inverse_sqrt --lr 3e-05 --min-lr -1 --warmup-updates 2500 --max-update 40000 \
24 |   --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \
25 |   --max-tokens 1024 --update-freq 2 \
26 |   --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \
27 |   --seed 222 --log-format simple --log-interval 2
28 | 


--------------------------------------------------------------------------------
/examples/multilingual/multilingual_fairseq_gen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | lang_pairs="en-fr,en-cs,fr-en,cs-en"
 4 | path_2_data=$1 # <path to data>
 5 | lang_list=$2 # <path to a file which contains list of languages separted by new lines>
 6 | model=$3  # <path to a trained model>
 7 | source_lang=cs
 8 | target_lang=en
 9 | 
10 | fairseq-generate "$path_2_data" \
11 |   --path "$model" \
12 |   --task translation_multi_simple_epoch \
13 |   --gen-subset test \
14 |   --source-lang "$source_lang" \
15 |   --target-lang "$target_lang" \
16 |   --sacrebleu --remove-bpe 'sentencepiece'\
17 |   --max-sentences 32 \
18 |   --encoder-langtok "src" \
19 |   --decoder-langtok \
20 |   --lang-dict "$lang_list" \
21 |   --lang-pairs "$lang_pairs"
22 | 


--------------------------------------------------------------------------------
/examples/multilingual/train_multilingual_model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | path_2_data=$1  # <path to data> which contains binarized data for each directions
 4 | lang_list=$2  # <path to a file which contains a list of languages separted by new lines>
 5 | lang_pairs=$3  #a list language pairs to train multilingual models, e.g. "en-fr,en-cs,fr-en,cs-en"
 6 | 
 7 | fairseq-train "$path_2_data" \
 8 |   --encoder-normalize-before --decoder-normalize-before \
 9 |   --arch transformer --layernorm-embedding \
10 |   --task translation_multi_simple_epoch \
11 |   --sampling-method "temperature" \
12 |   --sampling-temperature 1.5 \
13 |   --encoder-langtok "src" \
14 |   --decoder-langtok \
15 |   --lang-dict "$lang_list" \
16 |   --lang-pairs "$lang_pairs" \
17 |   --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \
18 |   --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \
19 |   --lr-scheduler inverse_sqrt --lr 3e-05 --min-lr -1 --warmup-updates 2500 --max-update 40000 \
20 |   --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \
21 |   --max-tokens 1024 --update-freq 2 \
22 |   --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \
23 |   --seed 222 --log-format simple --log-interval 2
24 | 


--------------------------------------------------------------------------------
/examples/noisychannel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .rerank_options import *  # noqa
7 | 


--------------------------------------------------------------------------------
/examples/quant_noise/transformer_quantization_config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # This file defines example configuration arguments for quantizing
 7 | # a transformer model with product quantization
 8 | 
 9 | # Number of Centroids for Product Quantization, by default 256 (byte-aligned)
10 | n_centroids:
11 |     Linear:
12 |         key: in_features
13 |         value: {"*": 256}
14 |     Embedding:
15 |         key: embedding_dim
16 |         value: {"*": 256}
17 | 
18 | # Block Sizes for Product Quantization
19 | # We suggest: 8 for FFN, 4 for ATTN, 4 for embedding projections, 8 for embeddings
20 | block_sizes:
21 |   Linear:
22 |       key: fuzzy_name
23 |       value: {fc: 8, attn: 4, emb: 4}
24 |   Embedding:
25 |       key: fuzzy_name
26 |       value: {emb: 8}
27 | 
28 | # Layers to Quantize Sequentially
29 | # We suggest: first FFN, then EMB, then ATTN
30 | layers_to_quantize:
31 |     - decoder\\.layers\\.\d+\\.fc[12]
32 |     - decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]
33 |     - decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj)
34 | 


--------------------------------------------------------------------------------
/examples/roberta/commonsense_qa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from . import commonsense_qa_task  # noqa
7 | 


--------------------------------------------------------------------------------
/examples/roberta/commonsense_qa/download_cqa_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | OUTDIR=data/CommonsenseQA
 8 | 
 9 | mkdir -p $OUTDIR
10 | 
11 | wget -O $OUTDIR/train.jsonl https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl
12 | wget -O $OUTDIR/valid.jsonl https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl
13 | wget -O $OUTDIR/test.jsonl https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl
14 | wget -O $OUTDIR/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt
15 | 


--------------------------------------------------------------------------------
/examples/roberta/wsc/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from . import wsc_criterion  # noqa
7 | from . import wsc_task  # noqa
8 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from . import criterions, models, eval  # noqa
7 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/criterions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | for file in os.listdir(os.path.dirname(__file__)):
10 |     if file.endswith(".py") and not file.startswith("_"):
11 |         criterion_name = file[: file.find(".py")]
12 |         importlib.import_module(
13 |             "examples.simultaneous_translation.criterions." + criterion_name
14 |         )
15 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/eval/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/eval/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | from fairseq import registry
 9 | 
10 | build_agent, register_agent, MONOTONIC_AGENT = registry.setup_registry('--agent-type')
11 | 
12 | 
13 | DEFAULT_EOS = '</s>'
14 | GET = 0
15 | SEND = 1
16 | 
17 | for file in os.listdir(os.path.dirname(__file__)):
18 |     if file.endswith('.py') and not file.startswith('_'):
19 |         module = file[:file.find('.py')]
20 |         importlib.import_module('agents.' + module)
21 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/eval/scorers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | from fairseq import registry
 9 | (
10 |     build_scorer,
11 |     register_scorer,
12 |     SCORER_REGISTRIES
13 | ) = registry.setup_registry('--scorer-type')
14 | 
15 | for file in os.listdir(os.path.dirname(__file__)):
16 |     if file.endswith('.py') and not file.startswith('_'):
17 |         module = file[:file.find('.py')]
18 |         importlib.import_module('scorers.' + module)
19 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/eval/scorers/text_scorer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . scorer import SimulScorer
 7 | from . import register_scorer
 8 | 
 9 | 
10 | @register_scorer("text")
11 | class SimulTextScorer(SimulScorer):
12 |     def __init__(self, args):
13 |         super().__init__(args)
14 |         self.data = {
15 |             "src": self._load_text_file(args.src_file, split=True),
16 |             "tgt": self._load_text_file(args.tgt_file, split=False)
17 |         }
18 | 
19 |     def send_src(self, sent_id, *args):
20 |         if self.steps[sent_id] >= len(self.data["src"][sent_id]):
21 |             dict_to_return = {
22 |                 "sent_id": sent_id,
23 |                 "segment_id": self.steps[sent_id],
24 |                 "segment": self.eos
25 |             }
26 |             # Consider EOS
27 |             self.steps[sent_id] = len(self.data["src"][sent_id]) + 1
28 |         else:
29 |             dict_to_return = {
30 |                 "sent_id": sent_id,
31 |                 "segment_id": self.steps[sent_id],
32 |                 "segment": self.data["src"][sent_id][self.steps[sent_id]]
33 |             }
34 | 
35 |             self.steps[sent_id] += 1
36 | 
37 |         return dict_to_return
38 | 
39 |     def src_lengths(self):
40 |         # +1 for eos
41 |         return [len(sent) + 1 for sent in self.data["src"]]
42 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | for file in os.listdir(os.path.dirname(__file__)):
10 |     if file.endswith('.py') and not file.startswith('_'):
11 |         model_name = file[:file.find('.py')]
12 |         importlib.import_module('examples.simultaneous_translation.models.' + model_name)
13 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | from fairseq import registry
10 | (
11 |     build_monotonic_attention,
12 |     register_monotonic_attention,
13 |     MONOTONIC_ATTENTION_REGISTRY
14 | ) = registry.setup_registry('--simul-type')
15 | 
16 | for file in os.listdir(os.path.dirname(__file__)):
17 |     if file.endswith('.py') and not file.startswith('_'):
18 |         model_name = file[:file.find('.py')]
19 |         importlib.import_module('examples.simultaneous_translation.modules.' + model_name)
20 | 


--------------------------------------------------------------------------------
/examples/simultaneous_translation/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | 
10 | # automatically import any Python files in the criterions/ directory
11 | for file in os.listdir(os.path.dirname(__file__)):
12 |     if file.endswith('.py') and not file.startswith('_'):
13 |         module = file[:file.find('.py')]
14 |         importlib.import_module('examples.simultaneous_translation.utils.' + module)
15 | 


--------------------------------------------------------------------------------
/examples/speech_recognition/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tasks, criterions, models  # noqa
2 | 


--------------------------------------------------------------------------------
/examples/speech_recognition/criterions/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import os
 3 | 
 4 | 
 5 | # ASG loss requires wav2letter
 6 | files_to_skip = set()
 7 | try:
 8 |     import wav2letter
 9 | except ImportError:
10 |     files_to_skip.add("ASG_loss.py")
11 | 
12 | for file in os.listdir(os.path.dirname(__file__)):
13 |     if file.endswith(".py") and not file.startswith("_") and file not in files_to_skip:
14 |         criterion_name = file[: file.find(".py")]
15 |         importlib.import_module(
16 |             "examples.speech_recognition.criterions." + criterion_name
17 |         )
18 | 


--------------------------------------------------------------------------------
/examples/speech_recognition/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .asr_dataset import AsrDataset
 7 | 
 8 | __all__ = [
 9 |     'AsrDataset',
10 | ]
11 | 


--------------------------------------------------------------------------------
/examples/speech_recognition/models/__init__.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import os
3 | 
4 | for file in os.listdir(os.path.dirname(__file__)):
5 |     if file.endswith('.py') and not file.startswith('_'):
6 |         model_name = file[:file.find('.py')]
7 |         importlib.import_module('examples.speech_recognition.models.' + model_name)
8 | 


--------------------------------------------------------------------------------
/examples/speech_recognition/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import os
3 | 
4 | for file in os.listdir(os.path.dirname(__file__)):
5 |     if file.endswith('.py') and not file.startswith('_'):
6 |         task_name = file[:file.find('.py')]
7 |         importlib.import_module('examples.speech_recognition.tasks.' + task_name)
8 | 


--------------------------------------------------------------------------------
/examples/translation_moe/src/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from . import translation_moe  # noqa
7 | 


--------------------------------------------------------------------------------
/examples/translation_moe/src/logsumexp_moe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | class LogSumExpMoE(torch.autograd.Function):
10 |     """Standard LogSumExp forward pass, but use *posterior* for the backward.
11 | 
12 |     See `"Mixture Models for Diverse Machine Translation: Tricks of the Trade"
13 |     (Shen et al., 2019) <https://arxiv.org/abs/1902.07816>`_.
14 |     """
15 | 
16 |     @staticmethod
17 |     def forward(ctx, logp, posterior, dim=-1):
18 |         ctx.save_for_backward(posterior)
19 |         ctx.dim = dim
20 |         return torch.logsumexp(logp, dim=dim)
21 | 
22 |     @staticmethod
23 |     def backward(ctx, grad_output):
24 |         posterior, = ctx.saved_tensors
25 |         grad_logp = grad_output.unsqueeze(ctx.dim) * posterior
26 |         return grad_logp, None, None
27 | 


--------------------------------------------------------------------------------
/examples/unsupervised_quality_estimation/aggregate_scores.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import argparse
 7 | import numpy as np
 8 | import sys
 9 | 
10 | 
11 | aggregate_funcs = {
12 |     'std': np.std,
13 |     'var': np.var,
14 |     'median': np.median,
15 |     'mean': np.mean,
16 |     'min': np.min,
17 |     'max': np.max,
18 | }
19 | 
20 | 
21 | def line2probs(_line):
22 |     return [np.exp(float(x)) for x in _line.split()]
23 | 
24 | 
25 | def main():
26 |     parser = argparse.ArgumentParser()
27 |     parser.add_argument('-i', '--input_file', required=True, type=str)
28 |     parser.add_argument('-n', '--repeat_times', required=True, type=int)
29 |     parser.add_argument('-o', '--output_file', required=False)
30 |     parser.add_argument('-f', '--func', required=False, default='mean')
31 |     args = parser.parse_args()
32 | 
33 |     stream = open(args.output_file, 'w') if args.output_file else sys.stdout
34 | 
35 |     segment_scores = []
36 |     for line in open(args.input_file):
37 |         segment_scores.append(line2probs(line))
38 |         if len(segment_scores) == args.repeat_times:
39 |             segment_scores = zip(*segment_scores)
40 |             res_scores = ['{:.4f}'.format(aggregate_funcs[args.func](x)) for x in segment_scores]
41 |             stream.write(' '.join(res_scores) + '\n')
42 |             segment_scores = []
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     main()
47 | 


--------------------------------------------------------------------------------
/examples/unsupervised_quality_estimation/repeat_lines.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import argparse
 7 | import sys
 8 | 
 9 | 
10 | def _normalize_spaces(line):
11 |     return ' '.join(line.split())
12 | 
13 | 
14 | def main():
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('-i', '--input_file', required=True, type=str)
17 |     parser.add_argument('-n', '--repeat_times', required=True, type=int)
18 |     parser.add_argument('-o', '--output_file', required=False, type=str)
19 |     args = parser.parse_args()
20 |     stream = open(args.output_file, 'w', encoding='utf-8') if args.output_file else sys.stdout
21 | 
22 |     for line in open(args.input_file, 'r', encoding='utf-8'):
23 |         for _ in range(args.repeat_times):
24 |             stream.write(_normalize_spaces(line) + '\n')
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     main()
29 | 


--------------------------------------------------------------------------------
/fairseq/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | __all__ = ['pdb']
 7 | __version__ = '0.9.0'
 8 | 
 9 | import sys
10 | 
11 | # backwards compatibility to support `from fairseq.meters import AverageMeter`
12 | from fairseq.logging import meters, metrics, progress_bar  # noqa
13 | sys.modules['fairseq.meters'] = meters
14 | sys.modules['fairseq.metrics'] = metrics
15 | sys.modules['fairseq.progress_bar'] = progress_bar
16 | 
17 | import fairseq.criterions  # noqa
18 | import fairseq.models  # noqa
19 | import fairseq.modules  # noqa
20 | import fairseq.optim  # noqa
21 | import fairseq.optim.lr_scheduler  # noqa
22 | import fairseq.pdb  # noqa
23 | import fairseq.scoring  # noqa
24 | import fairseq.tasks  # noqa
25 | import fairseq.token_generation_constraints  # noqa
26 | 
27 | import fairseq.benchmark  # noqa
28 | import fairseq.model_parallel  # noqa
29 | 


--------------------------------------------------------------------------------
/fairseq/benchmark/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # import models/tasks to register them
 7 | from . import (  # noqa
 8 |     dummy_lm,
 9 |     dummy_masked_lm,
10 |     dummy_model,
11 |     dummy_mt,
12 | )
13 | 


--------------------------------------------------------------------------------
/fairseq/clib/libbleu/module.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2017-present, Facebook, Inc.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #include <Python.h>
10 | 
11 | 
12 | static PyMethodDef method_def[] = {
13 |   {NULL, NULL, 0, NULL}
14 | };
15 | 
16 | static struct PyModuleDef module_def = {
17 |    PyModuleDef_HEAD_INIT,
18 |    "libbleu",   /* name of module */
19 |    NULL,     /* module documentation, may be NULL */
20 |    -1,       /* size of per-interpreter state of the module,
21 |                 or -1 if the module keeps state in global variables. */
22 |    method_def
23 | };
24 | 
25 | 
26 | #if PY_MAJOR_VERSION == 2
27 | PyMODINIT_FUNC init_libbleu()
28 | #else
29 | PyMODINIT_FUNC PyInit_libbleu()
30 | #endif
31 | {
32 |   PyObject *m = PyModule_Create(&module_def);
33 |   if (!m) {
34 |     return NULL;
35 |   }
36 |   return m;
37 | }
38 | 


--------------------------------------------------------------------------------
/fairseq/clib/libnat_cuda/edit_dist.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2017-present, Facebook, Inc.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <torch/extension.h>
12 | 
13 | torch::Tensor LevenshteinDistanceCuda(
14 |         torch::Tensor source,
15 |         torch::Tensor target,
16 |         torch::Tensor source_length,
17 |         torch::Tensor target_length);
18 | 
19 | torch::Tensor GenerateDeletionLabelCuda(
20 |         torch::Tensor source,
21 |         torch::Tensor operations);
22 | 
23 | std::pair<torch::Tensor, torch::Tensor> GenerateInsertionLabelCuda(
24 |         torch::Tensor source,
25 |         torch::Tensor operations);
26 | 


--------------------------------------------------------------------------------
/fairseq/criterions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | from fairseq import registry
10 | from fairseq.criterions.fairseq_criterion import FairseqCriterion, LegacyFairseqCriterion
11 | 
12 | 
13 | build_criterion, register_criterion, CRITERION_REGISTRY = registry.setup_registry(
14 |     '--criterion',
15 |     base_class=FairseqCriterion,
16 |     default='cross_entropy',
17 | )
18 | 
19 | 
20 | # automatically import any Python files in the criterions/ directory
21 | for file in os.listdir(os.path.dirname(__file__)):
22 |     if file.endswith('.py') and not file.startswith('_'):
23 |         module = file[:file.find('.py')]
24 |         importlib.import_module('fairseq.criterions.' + module)
25 | 


--------------------------------------------------------------------------------
/fairseq/data/append_token_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class AppendTokenDataset(BaseWrapperDataset):
13 | 
14 |     def __init__(self, dataset, token=None):
15 |         super().__init__(dataset)
16 |         self.token = token
17 |         if token is not None:
18 |             self._sizes = np.array(dataset.sizes) + 1
19 |         else:
20 |             self._sizes = dataset.sizes
21 | 
22 |     def __getitem__(self, idx):
23 |         item = self.dataset[idx]
24 |         if self.token is not None:
25 |             item = torch.cat([item, item.new([self.token])])
26 |         return item
27 | 
28 |     @property
29 |     def sizes(self):
30 |         return self._sizes
31 | 
32 |     def num_tokens(self, index):
33 |         n = self.dataset.num_tokens(index)
34 |         if self.token is not None:
35 |             n += 1
36 |         return n
37 | 
38 |     def size(self, index):
39 |         n = self.dataset.size(index)
40 |         if self.token is not None:
41 |             n += 1
42 |         return n
43 | 


--------------------------------------------------------------------------------
/fairseq/data/audio/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/data/audio/__init__.py


--------------------------------------------------------------------------------
/fairseq/data/colorize_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class ColorizeDataset(BaseWrapperDataset):
12 |     """ Adds 'colors' property to net input that is obtained from the provided color getter for use by models """
13 |     def __init__(self, dataset, color_getter):
14 |         super().__init__(dataset)
15 |         self.color_getter = color_getter
16 | 
17 |     def collater(self, samples):
18 |         base_collate = super().collater(samples)
19 |         if len(base_collate) > 0:
20 |             base_collate["net_input"]["colors"] = torch.tensor(
21 |                 list(self.color_getter(self.dataset, s["id"]) for s in samples),
22 |                 dtype=torch.long,
23 |             )
24 |         return base_collate
25 | 


--------------------------------------------------------------------------------
/fairseq/data/encoders/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | import importlib
 8 | import os
 9 | 
10 | from fairseq import registry
11 | 
12 | 
13 | build_tokenizer, register_tokenizer, TOKENIZER_REGISTRY = registry.setup_registry(
14 |     '--tokenizer',
15 |     default=None,
16 | )
17 | 
18 | 
19 | build_bpe, register_bpe, BPE_REGISTRY = registry.setup_registry(
20 |     '--bpe',
21 |     default=None,
22 | )
23 | 
24 | 
25 | # automatically import any Python files in the encoders/ directory
26 | for file in os.listdir(os.path.dirname(__file__)):
27 |     if file.endswith('.py') and not file.startswith('_'):
28 |         module = file[:file.find('.py')]
29 |         importlib.import_module('fairseq.data.encoders.' + module)
30 | 


--------------------------------------------------------------------------------
/fairseq/data/encoders/byte_bpe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | from fairseq import file_utils
 8 | from fairseq.data.encoders import register_bpe
 9 | from fairseq.data.encoders.byte_utils import (byte_encode, smart_byte_decode,
10 |                                               SPACE, SPACE_ESCAPE)
11 | 
12 | 
13 | @register_bpe('byte_bpe')
14 | class ByteBPE(object):
15 |     @staticmethod
16 |     def add_args(parser):
17 |         # fmt: off
18 |         parser.add_argument('--sentencepiece-model-path', type=str,
19 |                             help='path to sentencepiece model')
20 |         # fmt: on
21 | 
22 |     def __init__(self, args):
23 |         vocab = file_utils.cached_path(args.sentencepiece_model_path)
24 |         try:
25 |             import sentencepiece as spm
26 |             self.sp = spm.SentencePieceProcessor()
27 |             self.sp.Load(vocab)
28 |         except ImportError:
29 |             raise ImportError('Please install sentencepiece with: pip install sentencepiece')
30 | 
31 |     def encode(self, x: str) -> str:
32 |         byte_encoded = byte_encode(x)
33 |         return SPACE.join(self.sp.EncodeAsPieces(byte_encoded))
34 | 
35 |     @staticmethod
36 |     def decode(x: str) -> str:
37 |         unescaped = x.replace(SPACE, '').replace(SPACE_ESCAPE, SPACE)
38 |         return smart_byte_decode(unescaped)
39 | 


--------------------------------------------------------------------------------
/fairseq/data/encoders/bytes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | from fairseq.data.encoders import register_bpe
 8 | from fairseq.data.encoders.byte_utils import (byte_encode, smart_byte_decode,
 9 |                                               SPACE, SPACE_ESCAPE)
10 | 
11 | 
12 | @register_bpe('bytes')
13 | class Bytes(object):
14 |     def __init__(self, args):
15 |         pass
16 | 
17 |     @staticmethod
18 |     def add_args(parser):
19 |         pass
20 | 
21 |     @staticmethod
22 |     def encode(x: str) -> str:
23 |         encoded = byte_encode(x)
24 |         escaped = encoded.replace(SPACE, SPACE_ESCAPE)
25 |         return SPACE.join(list(escaped))
26 | 
27 |     @staticmethod
28 |     def decode(x: str) -> str:
29 |         unescaped = x.replace(SPACE, '').replace(SPACE_ESCAPE, SPACE)
30 |         return smart_byte_decode(unescaped)
31 | 


--------------------------------------------------------------------------------
/fairseq/data/encoders/characters.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | from fairseq.data.encoders import register_bpe
 8 | 
 9 | SPACE = chr(32)
10 | SPACE_ESCAPE = chr(9601)
11 | 
12 | 
13 | @register_bpe('characters')
14 | class Characters(object):
15 |     def __init__(self, args):
16 |         pass
17 | 
18 |     @staticmethod
19 |     def add_args(parser):
20 |         pass
21 | 
22 |     @staticmethod
23 |     def encode(x: str) -> str:
24 |         escaped = x.replace(SPACE, SPACE_ESCAPE)
25 |         return SPACE.join(list(escaped))
26 | 
27 |     @staticmethod
28 |     def decode(x: str) -> str:
29 |         return x.replace(SPACE, '').replace(SPACE_ESCAPE, SPACE)
30 | 


--------------------------------------------------------------------------------
/fairseq/data/encoders/fastbpe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq import file_utils
 7 | from fairseq.data.encoders import register_bpe
 8 | 
 9 | 
10 | @register_bpe('fastbpe')
11 | class fastBPE(object):
12 | 
13 |     @staticmethod
14 |     def add_args(parser):
15 |         # fmt: off
16 |         parser.add_argument('--bpe-codes', type=str,
17 |                             help='path to fastBPE BPE')
18 |         # fmt: on
19 | 
20 |     def __init__(self, args):
21 |         if args.bpe_codes is None:
22 |             raise ValueError('--bpe-codes is required for --bpe=fastbpe')
23 |         codes = file_utils.cached_path(args.bpe_codes)
24 |         try:
25 |             import fastBPE
26 |             self.bpe = fastBPE.fastBPE(codes)
27 |             self.bpe_symbol = "@@ "
28 |         except ImportError:
29 |             raise ImportError('Please install fastBPE with: pip install fastBPE')
30 | 
31 |     def encode(self, x: str) -> str:
32 |         return self.bpe.apply([x])[0]
33 | 
34 |     def decode(self, x: str) -> str:
35 |         return (x + ' ').replace(self.bpe_symbol, '').rstrip()
36 | 


--------------------------------------------------------------------------------
/fairseq/data/encoders/nltk_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq.data.encoders import register_tokenizer
 7 | 
 8 | 
 9 | @register_tokenizer('nltk')
10 | class NLTKTokenizer(object):
11 | 
12 |     def __init__(self, source_lang=None, target_lang=None):
13 |         try:
14 |             from nltk.tokenize import word_tokenize
15 |             self.word_tokenize = word_tokenize
16 |         except ImportError:
17 |             raise ImportError('Please install nltk with: pip install nltk')
18 | 
19 |     def encode(self, x: str) -> str:
20 |         return ' '.join(self.word_tokenize(x))
21 | 
22 |     def decode(self, x: str) -> str:
23 |         return x
24 | 


--------------------------------------------------------------------------------
/fairseq/data/encoders/space_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import re
 7 | 
 8 | from fairseq.data.encoders import register_tokenizer
 9 | 
10 | 
11 | @register_tokenizer('space')
12 | class SpaceTokenizer(object):
13 | 
14 |     def __init__(self, source_lang=None, target_lang=None):
15 |         self.space_tok = re.compile(r"\s+")
16 | 
17 |     def encode(self, x: str) -> str:
18 |         return self.space_tok.sub(' ', x)
19 | 
20 |     def decode(self, x: str) -> str:
21 |         return x
22 | 


--------------------------------------------------------------------------------
/fairseq/data/encoders/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | from fairseq.data import encoders
 8 | 
 9 | 
10 | def get_whole_word_mask(args, dictionary):
11 |     bpe = encoders.build_bpe(args)
12 |     if bpe is not None:
13 |         def is_beginning_of_word(i):
14 |             if i < dictionary.nspecial:
15 |                 # special elements are always considered beginnings
16 |                 return True
17 |             tok = dictionary[i]
18 |             if tok.startswith('madeupword'):
19 |                 return True
20 |             try:
21 |                 return bpe.is_beginning_of_word(tok)
22 |             except ValueError:
23 |                 return True
24 |         mask_whole_words = torch.ByteTensor(list(
25 |             map(is_beginning_of_word, range(len(dictionary)))
26 |         ))
27 |         return mask_whole_words
28 |     return None
29 | 


--------------------------------------------------------------------------------
/fairseq/data/id_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | from . import FairseqDataset
 9 | 
10 | 
11 | class IdDataset(FairseqDataset):
12 | 
13 |     def __getitem__(self, index):
14 |         return index
15 | 
16 |     def __len__(self):
17 |         return 0
18 | 
19 |     def collater(self, samples):
20 |         return torch.tensor(samples)
21 | 


--------------------------------------------------------------------------------
/fairseq/data/legacy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .masked_lm_dictionary import BertDictionary, MaskedLMDictionary
 7 | from .block_pair_dataset import BlockPairDataset
 8 | from .masked_lm_dataset import MaskedLMDataset
 9 | 
10 | __all__ = [
11 |     'BertDictionary',
12 |     'BlockPairDataset',
13 |     'MaskedLMDataset',
14 |     'MaskedLMDictionary',
15 | ]
16 | 


--------------------------------------------------------------------------------
/fairseq/data/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import BaseWrapperDataset
 7 | 
 8 | 
 9 | class ListDataset(BaseWrapperDataset):
10 | 
11 |     def __init__(self, dataset, sizes=None):
12 |         super().__init__(dataset)
13 |         self._sizes = sizes
14 | 
15 |     def __iter__(self):
16 |         for x in self.dataset:
17 |             yield x
18 | 
19 |     def collater(self, samples):
20 |         return samples
21 | 
22 |     @property
23 |     def sizes(self):
24 |         return self._sizes
25 | 
26 |     def num_tokens(self, index):
27 |         return self.sizes[index]
28 | 
29 |     def size(self, index):
30 |         return self.sizes[index]
31 | 
32 |     def set_epoch(self, epoch):
33 |         pass
34 | 


--------------------------------------------------------------------------------
/fairseq/data/lru_cache_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from functools import lru_cache
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class LRUCacheDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, token=None):
14 |         super().__init__(dataset)
15 | 
16 |     @lru_cache(maxsize=8)
17 |     def __getitem__(self, index):
18 |         return self.dataset[index]
19 | 
20 |     @lru_cache(maxsize=8)
21 |     def collater(self, samples):
22 |         return self.dataset.collater(samples)
23 | 


--------------------------------------------------------------------------------
/fairseq/data/multilingual/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 


--------------------------------------------------------------------------------
/fairseq/data/num_samples_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import FairseqDataset
 7 | 
 8 | 
 9 | class NumSamplesDataset(FairseqDataset):
10 | 
11 |     def __getitem__(self, index):
12 |         return 1
13 | 
14 |     def __len__(self):
15 |         return 0
16 | 
17 |     def collater(self, samples):
18 |         return sum(samples)
19 | 


--------------------------------------------------------------------------------
/fairseq/data/numel_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class NumelDataset(BaseWrapperDataset):
13 | 
14 |     def __init__(self, dataset, reduce=False):
15 |         super().__init__(dataset)
16 |         self.reduce = reduce
17 | 
18 |     def __getitem__(self, index):
19 |         item = self.dataset[index]
20 |         if torch.is_tensor(item):
21 |             return torch.numel(item)
22 |         else:
23 |             return np.size(item)
24 | 
25 |     def __len__(self):
26 |         return len(self.dataset)
27 | 
28 |     def collater(self, samples):
29 |         if self.reduce:
30 |             return sum(samples)
31 |         else:
32 |             return torch.tensor(samples)
33 | 


--------------------------------------------------------------------------------
/fairseq/data/offset_tokens_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import BaseWrapperDataset
 7 | 
 8 | 
 9 | class OffsetTokensDataset(BaseWrapperDataset):
10 | 
11 |     def __init__(self, dataset, offset):
12 |         super().__init__(dataset)
13 |         self.offset = offset
14 | 
15 |     def __getitem__(self, idx):
16 |         return self.dataset[idx] + self.offset
17 | 


--------------------------------------------------------------------------------
/fairseq/data/pad_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq.data import data_utils
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class PadDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, pad_idx, left_pad):
14 |         super().__init__(dataset)
15 |         self.pad_idx = pad_idx
16 |         self.left_pad = left_pad
17 | 
18 |     def collater(self, samples):
19 |         return data_utils.collate_tokens(samples, self.pad_idx, left_pad=self.left_pad)
20 | 
21 | 
22 | class LeftPadDataset(PadDataset):
23 | 
24 |     def __init__(self, dataset, pad_idx):
25 |         super().__init__(dataset, pad_idx, left_pad=True)
26 | 
27 | 
28 | class RightPadDataset(PadDataset):
29 | 
30 |     def __init__(self, dataset, pad_idx):
31 |         super().__init__(dataset, pad_idx, left_pad=False)
32 | 


--------------------------------------------------------------------------------
/fairseq/data/prepend_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class PrependDataset(BaseWrapperDataset):
13 |     def __init__(self, dataset, prepend_getter, ensure_first_token_is=None):
14 |         super().__init__(dataset)
15 |         self.prepend_getter = prepend_getter
16 |         self.ensure_first_token = ensure_first_token_is
17 | 
18 |     def __getitem__(self, idx):
19 |         item = self.dataset[idx]
20 |         is_tuple = isinstance(item, tuple)
21 |         src = item[0] if is_tuple else item
22 | 
23 |         assert self.ensure_first_token is None or src[0] == self.ensure_first_token
24 |         prepend_idx = self.prepend_getter(self.dataset, idx)
25 |         assert isinstance(prepend_idx, int)
26 |         src[0] = prepend_idx
27 |         item = tuple((src,) + item[1:]) if is_tuple else src
28 |         return item
29 | 


--------------------------------------------------------------------------------
/fairseq/data/prepend_token_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class PrependTokenDataset(BaseWrapperDataset):
13 | 
14 |     def __init__(self, dataset, token=None):
15 |         super().__init__(dataset)
16 |         self.token = token
17 |         if token is not None:
18 |             self._sizes = np.array(dataset.sizes) + 1
19 |         else:
20 |             self._sizes = dataset.sizes
21 | 
22 |     def __getitem__(self, idx):
23 |         item = self.dataset[idx]
24 |         if self.token is not None:
25 |             item = torch.cat([item.new([self.token]), item])
26 |         return item
27 | 
28 |     @property
29 |     def sizes(self):
30 |         return self._sizes
31 | 
32 |     def num_tokens(self, index):
33 |         n = self.dataset.num_tokens(index)
34 |         if self.token is not None:
35 |             n += 1
36 |         return n
37 | 
38 |     def size(self, index):
39 |         n = self.dataset.size(index)
40 |         if self.token is not None:
41 |             n += 1
42 |         return n
43 | 


--------------------------------------------------------------------------------
/fairseq/data/raw_label_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | from . import FairseqDataset
 9 | 
10 | 
11 | class RawLabelDataset(FairseqDataset):
12 | 
13 |     def __init__(self, labels):
14 |         super().__init__()
15 |         self.labels = labels
16 | 
17 |     def __getitem__(self, index):
18 |         return self.labels[index]
19 | 
20 |     def __len__(self):
21 |         return len(self.labels)
22 | 
23 |     def collater(self, samples):
24 |         return torch.tensor(samples)
25 | 


--------------------------------------------------------------------------------
/fairseq/data/replace_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import BaseWrapperDataset
 7 | 
 8 | 
 9 | class ReplaceDataset(BaseWrapperDataset):
10 |     """Replaces tokens found in the dataset by a specified replacement token
11 | 
12 |         Args:
13 |             dataset (~torch.utils.data.Dataset): dataset to replace tokens in
14 |             replace_map(Dictionary[int,int]): map of token to replace -> replacement token
15 |             offsets (List[int]): do not replace tokens before (from left if pos, right if neg) this offset. should be
16 |             as many as the number of objects returned by the underlying dataset __getitem__ method.
17 |         """
18 | 
19 |     def __init__(self, dataset, replace_map, offsets):
20 |         super().__init__(dataset)
21 |         assert len(replace_map) > 0
22 |         self.replace_map = replace_map
23 |         self.offsets = offsets
24 | 
25 |     def __getitem__(self, index):
26 |         item = self.dataset[index]
27 |         is_tuple = isinstance(item, tuple)
28 |         srcs = item if is_tuple else [item]
29 | 
30 |         for offset, src in zip(self.offsets, srcs):
31 |             for k, v in self.replace_map.items():
32 |                 src_off = src[offset:] if offset >= 0 else src[:offset]
33 |                 src_off.masked_fill_(src_off == k, v)
34 | 
35 |         item = srcs if is_tuple else srcs[0]
36 |         return item
37 | 


--------------------------------------------------------------------------------
/fairseq/data/roll_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class RollDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, shifts):
14 |         super().__init__(dataset)
15 |         self.shifts = shifts
16 | 
17 |     def __getitem__(self, index):
18 |         item = self.dataset[index]
19 |         return torch.roll(item, self.shifts)
20 | 


--------------------------------------------------------------------------------
/fairseq/data/sort_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class SortDataset(BaseWrapperDataset):
12 | 
13 |     def __init__(self, dataset, sort_order):
14 |         super().__init__(dataset)
15 |         if not isinstance(sort_order, (list, tuple)):
16 |             sort_order = [sort_order]
17 |         self.sort_order = sort_order
18 | 
19 |         assert all(len(so) == len(dataset) for so in sort_order)
20 | 
21 |     def ordered_indices(self):
22 |         return np.lexsort(self.sort_order)
23 | 


--------------------------------------------------------------------------------
/fairseq/data/strip_token_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from . import BaseWrapperDataset
 7 | 
 8 | 
 9 | class StripTokenDataset(BaseWrapperDataset):
10 | 
11 |     def __init__(self, dataset, id_to_strip):
12 |         super().__init__(dataset)
13 |         self.id_to_strip = id_to_strip
14 | 
15 |     def __getitem__(self, index):
16 |         item = self.dataset[index]
17 |         while len(item) > 0 and item[-1] == self.id_to_strip:
18 |             item = item[:-1]
19 |         while len(item) > 0 and item[0] == self.id_to_strip:
20 |             item = item[1:]
21 |         return item
22 | 


--------------------------------------------------------------------------------
/fairseq/dataclass/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/dataclass/__init__.py


--------------------------------------------------------------------------------
/fairseq/logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/logging/__init__.py


--------------------------------------------------------------------------------
/fairseq/model_parallel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from . import criterions, modules, models  # noqa
7 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/criterions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | 
10 | # automatically import any Python files in the criterions/ directory
11 | for file in os.listdir(os.path.dirname(__file__)):
12 |     if file.endswith('.py') and not file.startswith('_'):
13 |         module = file[:file.find('.py')]
14 |         importlib.import_module('fairseq.model_parallel.criterions.' + module)
15 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # ===========
 2 | # base images
 3 | # ===========
 4 | FROM nvcr.io/nvidia/pytorch:19.05-py3
 5 | 
 6 | 
 7 | # ===============
 8 | # system packages
 9 | # ===============
10 | RUN apt-get update && apt-get install -y \
11 |     bash-completion \
12 |     emacs \
13 |     git \
14 |     graphviz \
15 |     htop \
16 |     libopenexr-dev \
17 |     rsync \
18 |     wget \
19 | && rm -rf /var/lib/apt/lists/*
20 | 
21 | 
22 | # ============
23 | # pip packages
24 | # ============
25 | RUN pip install --upgrade pip && \
26 |     pip install --upgrade setuptools
27 | COPY requirements.txt /tmp/
28 | RUN pip install --upgrade --ignore-installed -r /tmp/requirements.txt
29 | 
30 | 
31 | # ===========
32 | # latest apex
33 | # ===========
34 | RUN pip uninstall -y apex && \
35 | git clone https://github.com/NVIDIA/apex.git ~/apex && \
36 | cd ~/apex && \
37 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
38 | 
39 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/docker/README.md:
--------------------------------------------------------------------------------
1 | Note that as of now you need to have PySOL cloned to the directory here before building the container.
2 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/docker/requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3
 2 | google-cloud-language
 3 | inflect
 4 | nltk
 5 | numpy
 6 | pandas
 7 | requests
 8 | sentencepiece
 9 | tensorflow
10 | tqdm
11 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/fp16/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from .fp16util import (
16 |     BN_convert_float,
17 |     network_to_half,
18 |     prep_param_lists,
19 |     model_grads_to_master_grads,
20 |     master_params_to_model_params,
21 |     tofp16,
22 |     to_python_float,
23 |     clip_grad_norm,
24 |     convert_module,
25 |     convert_network,
26 |     FP16Model,
27 | )
28 | 
29 | from .fp16 import *
30 | from .loss_scaler import *
31 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .distributed import *
17 | from .gpt2_modeling import gpt2_get_params_for_weight_decay_optimization
18 | from .gpt2_modeling import GPT2Model
19 | from .model import BertModel
20 | from .model import get_params_for_weight_decay_optimization
21 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/mpu/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/model_parallel/megatron/mpu/tests/__init__.py


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/openwebtext/make_gpt2_sizes.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import glob
 3 | import json
 4 | import os
 5 | import time
 6 | import sys
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | if __name__ == '__main__':
12 | 
13 |     print('building the shard sizes ...')
14 | 
15 |     path = sys.argv[1]
16 |     print('> reading numpy files from {}'.format(path))
17 | 
18 |     npy_files = glob.glob(path + '/*.npy')
19 |     npy_files.sort()
20 |     print('  found {} numpy files'.format(len(npy_files)))
21 | 
22 |     size_dict = {}
23 |     counter = 0
24 |     start_time = time.time()
25 |     for filename in npy_files:
26 |         data = np.load(filename, allow_pickle=True)
27 |         size = np.hstack(data).size
28 |         np_filename = os.path.basename(filename)
29 |         size_dict[np_filename] = size
30 |         counter += 1
31 |         if counter % 10 == 0:
32 |             print('   processed {} files in {:.2f} seconds'.format(
33 |                 counter, time.time() - start_time))
34 | 
35 |     output_filename = os.path.join(path, 'sizes.txt')
36 |     with open(output_filename, 'w') as f:
37 |         json.dump(size_dict, f)
38 |     print('> wrote sizes to {}'.format(output_filename))
39 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/openwebtext/run_make_gpt2_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "processing gpt2 data ..."
4 | DIR="/raid/mpatwary/redownload_v0/0-21"
5 | 
6 | for thread in {0..3}; do
7 |     echo " launching thread "$thread && python make_gpt2_dataset.py $DIR $thread > $DIR/logs/shard_$thread.log 2>&1 &
8 | done
9 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/openwebtext/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import sys
17 | sys.path.append('..')
18 | 
19 | from data_utils.tokenization_gpt2 import GPT2Tokenizer
20 | 
21 | 
22 | class Tokenizer:
23 | 
24 |     def __init__(self, cache_dir=None):
25 |         self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2',
26 |                                                        cache_dir=cache_dir)
27 |         self.tokenizer.max_len = int(1e12)
28 |         self.eod_token = self.tokenizer.encoder['<|endoftext|>']
29 |         assert self.eod_token < 65535, 'vocab size will not fit in uint16'
30 |         print('> GPT2 tokenizer with {} vocab size and eod token {} ...'.format(
31 |             len(self.tokenizer.encoder), self.eod_token))
32 | 
33 |     def tokenize_document(self, document):
34 |         tokens = self.tokenizer.encode(document)
35 |         tokens.append(self.eod_token)
36 |         return tokens
37 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/requirements.txt:
--------------------------------------------------------------------------------
1 | nltk>=3.4
2 | numpy>=1.15.4
3 | pandas>=0.24.0
4 | sentencepiece>=0.1.8
5 | tensorflow>=1.12.0
6 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/generate_text.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CHECKPOINT_PATH=checkpoints/gpt2_345m/
 4 | MPSIZE=1
 5 | NLAYERS=12
 6 | NHIDDEN=768
 7 | NATT=12
 8 | MAXSEQLEN=1024
 9 | 
10 | #SAMPLING ARGS
11 | TEMP=0.9
12 | #If TOPK/TOPP are 0 it defaults to greedy sampling, top-k will also override top-p
13 | TOPK=0
14 | TOPP=0
15 | 
16 | python generate_samples.py \
17 |        --model-parallel-size $MPSIZE \
18 |        --num-layers $NLAYERS \
19 |        --hidden-size $NHIDDEN \
20 |        --load $CHECKPOINT_PATH \
21 |        --num-attention-heads $NATT \
22 |        --max-position-embeddings 1024 \
23 |        --tokenizer-type GPT2BPETokenizer \
24 |        --fp16 \
25 |        --cache-dir cache \
26 |        --out-seq-length $MAXSEQLEN \
27 |        --temperature $TEMP \
28 |        --top_k $TOPK \
29 |        --genfile dbg_unconditional.json \
30 |        --num-samples 10 \
31 |        --top_p $TOPP \
32 |        --recompute
33 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/presplit_sentences_json.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 | python scripts/presplit_sentences_json.py <original loose json file> <output loose json file>
 4 | """
 5 | 
 6 | import sys
 7 | import json
 8 | 
 9 | import nltk
10 | 
11 | nltk.download('punkt')
12 | 
13 | input_file = sys.argv[1]
14 | output_file = sys.argv[2]
15 | 
16 | line_seperator = "\n"
17 | 
18 | with open(input_file, 'r') as ifile:
19 |   with open(output_file, "w") as ofile:
20 |     for doc in ifile.readlines():
21 |       parsed = json.loads(doc)
22 |       sent_list = []
23 |       for line in parsed['text'].split('\n'):
24 |           if line != '\n':
25 |               sent_list.extend(nltk.tokenize.sent_tokenize(line))
26 |       parsed['text'] = line_seperator.join(sent_list)
27 |       ofile.write(json.dumps(parsed)+'\n')
28 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/pretrain_bert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RANK=0
 4 | WORLD_SIZE=1
 5 | 
 6 | python pretrain_bert.py \
 7 |        --num-layers 24 \
 8 |        --hidden-size 1024 \
 9 |        --num-attention-heads 16 \
10 |        --batch-size 4 \
11 |        --seq-length 512 \
12 |        --max-preds-per-seq 80 \
13 |        --max-position-embeddings 512 \
14 |        --train-iters 1000000 \
15 |        --save checkpoints/bert_345m \
16 |        --load checkpoints/bert_345m \
17 |        --resume-dataloader \
18 |        --train-data wikipedia \
19 |        --lazy-loader \
20 |        --tokenizer-type BertWordPieceTokenizer \
21 |        --tokenizer-model-type bert-large-uncased \
22 |        --presplit-sentences \
23 |        --cache-dir cache \
24 |        --split 949,50,1 \
25 |        --distributed-backend nccl \
26 |        --lr 0.0001 \
27 |        --lr-decay-style linear \
28 |        --lr-decay-iters 990000 \
29 |        --weight-decay 1e-2 \
30 |        --clip-grad 1.0 \
31 |        --warmup .01 \
32 |        --fp16 \
33 |        --fp32-layernorm \
34 |        --fp32-embedding
35 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/pretrain_bert_distributed.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | GPUS_PER_NODE=8
 4 | # Change for multinode config
 5 | MASTER_ADDR=localhost
 6 | MASTER_PORT=6000
 7 | NNODES=1
 8 | NODE_RANK=0
 9 | WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
10 | 
11 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
12 | 
13 | python -m torch.distributed.launch $DISTRIBUTED_ARGS \
14 |        pretrain_bert.py \
15 |        --num-layers 24 \
16 |        --hidden-size 1024 \
17 |        --num-attention-heads 16 \
18 |        --batch-size 4 \
19 |        --seq-length 512 \
20 |        --max-preds-per-seq 80 \
21 |        --max-position-embeddings 512 \
22 |        --train-iters 1000000 \
23 |        --save checkpoints/bert_345m \
24 |        --load checkpoints/bert_345m \
25 |        --resume-dataloader \
26 |        --train-data wikipedia \
27 |        --lazy-loader \
28 |        --tokenizer-type BertWordPieceTokenizer \
29 |        --tokenizer-model-type bert-large-uncased \
30 |        --presplit-sentences \
31 |        --cache-dir cache \
32 |        --split 949,50,1 \
33 |        --distributed-backend nccl \
34 |        --lr 0.0001 \
35 |        --lr-decay-style linear \
36 |        --lr-decay-iters 990000 \
37 |        --weight-decay 1e-2 \
38 |        --clip-grad 1.0 \
39 |        --warmup .01 \
40 |        --fp16 \
41 |        --fp32-layernorm \
42 |        --fp32-embedding
43 | 
44 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/pretrain_bert_model_parallel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | GPUS_PER_NODE=8
 4 | # Change for multinode config
 5 | MASTER_ADDR=localhost
 6 | MASTER_PORT=6000
 7 | NNODES=1
 8 | NODE_RANK=0
 9 | WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
10 | 
11 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
12 | 
13 | python -m torch.distributed.launch $DISTRIBUTED_ARGS \
14 |        pretrain_bert.py \
15 |        --model-parallel-size 2 \
16 |        --num-layers 24 \
17 |        --hidden-size 1024 \
18 |        --num-attention-heads 16 \
19 |        --batch-size 4 \
20 |        --seq-length 512 \
21 |        --max-preds-per-seq 80 \
22 |        --max-position-embeddings 512 \
23 |        --train-iters 1000000 \
24 |        --save checkpoints/bert_345m_mp2 \
25 |        --load checkpoints/bert_345m_mp2 \
26 |        --resume-dataloader \
27 |        --train-data wikipedia \
28 |        --lazy-loader \
29 |        --tokenizer-type BertWordPieceTokenizer \
30 |        --tokenizer-model-type bert-large-uncased \
31 |        --presplit-sentences \
32 |        --cache-dir cache \
33 |        --split 949,50,1 \
34 |        --distributed-backend nccl \
35 |        --lr 0.0001 \
36 |        --lr-decay-style linear \
37 |        --lr-decay-iters 990000 \
38 |        --weight-decay 1e-2 \
39 |        --clip-grad 1.0 \
40 |        --warmup .01 \
41 |        --fp16 \
42 |        --fp32-layernorm \
43 |        --fp32-embedding
44 | 
45 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/pretrain_bert_sentencepiece.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RANK=0
 4 | WORLD_SIZE=1
 5 | 
 6 | python pretrain_bert.py \
 7 |        --num-layers 24 \
 8 |        --hidden-size 1024 \
 9 |        --num-attention-heads 16 \
10 |        --batch-size 4 \
11 |        --seq-length 512 \
12 |        --max-preds-per-seq 80 \
13 |        --max-position-embeddings 512 \
14 |        --train-iters 1000000 \
15 |        --save checkpoints/bert_345m \
16 |        --load checkpoints/bert_345m \
17 |        --resume-dataloader \
18 |        --train-data wikipedia \
19 |        --lazy-loader \
20 |        --tokenizer-type SentencePieceTokenizer \
21 |        --tokenizer-model-type bpe \
22 |        --tokenizer-path tokenizer.model \
23 |        --presplit-sentences \
24 |        --cache-dir cache \
25 |        --split 949,50,1 \
26 |        --distributed-backend nccl \
27 |        --lr 0.0001 \
28 |        --lr-decay-style linear \
29 |        --lr-decay-iters 990000 \
30 |        --weight-decay 1e-2 \
31 |        --clip-grad 1.0 \
32 |        --warmup .01 \
33 |        --fp16 \
34 |        --fp32-layernorm \
35 |        --fp32-embedding
36 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/pretrain_bert_tfrecords_distributed.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | GPUS_PER_NODE=8
 4 | # Change for multinode config
 5 | MASTER_ADDR=localhost
 6 | MASTER_PORT=6000
 7 | NNODES=1
 8 | NODE_RANK=0
 9 | WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
10 | 
11 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
12 | 
13 | python -m torch.distributed.launch $DISTRIBUTED_ARGS \
14 |        pretrain_bert.py \
15 |        --num-layers 24 \
16 |        --hidden-size 1024 \
17 |        --num-attention-heads 16 \
18 |        --batch-size 4 \
19 |        --seq-length 512 \
20 |        --max-preds-per-seq 80 \
21 |        --max-position-embeddings 512 \
22 |        --train-iters 1000000 \
23 |        --save checkpoints/bert_345m \
24 |        --load checkpoints/bert_345m \
25 |        --resume-dataloader \
26 |        --use-tfrecords \
27 |        --train-data <TF Record 1> <TFRecord 2> \
28 |        --valid-data <TF Record 3> \
29 |        --test-data <TF Record 4> \
30 |        --tokenizer-type BertWordPieceTokenizer \
31 |        --tokenizer-model-type bert-large-uncased \
32 |        --presplit-sentences \
33 |        --cache-dir cache \
34 |        --split 949,50,1 \
35 |        --distributed-backend nccl \
36 |        --lr 0.0001 \
37 |        --lr-decay-style linear \
38 |        --lr-decay-iters 990000 \
39 |        --weight-decay 1e-2 \
40 |        --clip-grad 1.0 \
41 |        --warmup .01 \
42 |        --fp16 \
43 |        --fp32-layernorm \
44 |        --fp32-embedding
45 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/pretrain_gpt2.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Runs the "345M" parameter model
 4 | 
 5 | RANK=0
 6 | WORLD_SIZE=1
 7 | 
 8 | python pretrain_gpt2.py \
 9 |        --num-layers 24 \
10 |        --hidden-size 1024 \
11 |        --num-attention-heads 16 \
12 |        --batch-size 8 \
13 |        --seq-length 1024 \
14 |        --max-position-embeddings 1024 \
15 |        --train-iters 320000 \
16 |        --save checkpoints/gpt2_345m \
17 |        --load checkpoints/gpt2_345m \
18 |        --resume-dataloader \
19 |        --train-data wikipedia \
20 |        --lazy-loader \
21 |        --tokenizer-type GPT2BPETokenizer \
22 |        --cache-dir cache \
23 |        --split 949,50,1 \
24 |        --distributed-backend nccl \
25 |        --lr 0.00015 \
26 |        --lr-decay-style cosine \
27 |        --weight-decay 1e-2 \
28 |        --clip-grad 1.0 \
29 |        --warmup .01 \
30 |        --checkpoint-activations \
31 |        --fp16
32 | 
33 | 
34 | set +x
35 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/pretrain_gpt2_distributed.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Runs the "345M" parameter model
 4 | 
 5 | GPUS_PER_NODE=8
 6 | # Change for multinode config
 7 | MASTER_ADDR=localhost
 8 | MASTER_PORT=6000
 9 | NNODES=1
10 | NODE_RANK=0
11 | WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
12 | 
13 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
14 | 
15 | python -m torch.distributed.launch $DISTRIBUTED_ARGS \
16 |        pretrain_gpt2.py \
17 |        --num-layers 24 \
18 |        --hidden-size 1024 \
19 |        --num-attention-heads 16 \
20 |        --batch-size 8 \
21 |        --seq-length 1024 \
22 |        --max-position-embeddings 1024 \
23 |        --train-iters 320000 \
24 |        --save checkpoints/gpt2_345m \
25 |        --load checkpoints/gpt2_345m \
26 |        --resume-dataloader \
27 |        --train-data wikipedia \
28 |        --lazy-loader \
29 |        --tokenizer-type GPT2BPETokenizer \
30 |        --cache-dir cache \
31 |        --split 949,50,1 \
32 |        --distributed-backend nccl \
33 |        --lr 0.00015 \
34 |        --lr-decay-style cosine \
35 |        --weight-decay 1e-2 \
36 |        --clip-grad 1.0 \
37 |        --warmup .01 \
38 |        --checkpoint-activations \
39 |        --fp16
40 | 
41 | 
42 | set +x
43 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/megatron/scripts/pretrain_gpt2_model_parallel.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Runs the "345M" parameter model
 4 | 
 5 | GPUS_PER_NODE=8
 6 | # Change for multinode config
 7 | MASTER_ADDR=localhost
 8 | MASTER_PORT=6000
 9 | NNODES=1
10 | NODE_RANK=0
11 | WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
12 | 
13 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
14 | 
15 | python -m torch.distributed.launch $DISTRIBUTED_ARGS \
16 |        pretrain_gpt2.py \
17 |        --model-parallel-size 2 \
18 |        --num-layers 24 \
19 |        --hidden-size 1024 \
20 |        --num-attention-heads 16 \
21 |        --batch-size 8 \
22 |        --seq-length 1024 \
23 |        --max-position-embeddings 1024 \
24 |        --train-iters 320000 \
25 |        --save checkpoints/gpt2_345m_mp2 \
26 |        --load checkpoints/gpt2_345m_mp2 \
27 |        --resume-dataloader \
28 |        --train-data wikipedia \
29 |        --lazy-loader \
30 |        --tokenizer-type GPT2BPETokenizer \
31 |        --cache-dir cache \
32 |        --split 949,50,1 \
33 |        --distributed-backend nccl \
34 |        --lr 0.00015 \
35 |        --lr-decay-style cosine \
36 |        --weight-decay 1e-2 \
37 |        --clip-grad 1.0 \
38 |        --warmup .01 \
39 |        --checkpoint-activations \
40 |        --fp16
41 | 
42 | 
43 | set +x
44 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | 
10 | # automatically import any Python files in the models/ directory
11 | models_dir = os.path.dirname(__file__)
12 | for file in os.listdir(models_dir):
13 |     path = os.path.join(models_dir, file)
14 |     if not file.startswith('_') and not file.startswith('.') and (file.endswith('.py') or os.path.isdir(path)):
15 |         model_name = file[:file.find('.py')] if file.endswith('.py') else file
16 |         module = importlib.import_module('fairseq.model_parallel.models.' + model_name)
17 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/models/pipeline_parallel_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .model import *  # noqa
7 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/models/roberta/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .model import *  # noqa
7 | 


--------------------------------------------------------------------------------
/fairseq/model_parallel/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .multihead_attention import ModelParallelMultiheadAttention
 7 | from .transformer_layer import ModelParallelTransformerEncoderLayer, ModelParallelTransformerDecoderLayer
 8 | from .transformer_sentence_encoder_layer import ModelParallelTransformerSentenceEncoderLayer
 9 | from .transformer_sentence_encoder import ModelParallelTransformerSentenceEncoder
10 | 
11 | __all__ = [
12 |     'ModelParallelMultiheadAttention',
13 |     'ModelParallelTransformerEncoderLayer',
14 |     'ModelParallelTransformerDecoderLayer',
15 |     'ModelParallelTransformerSentenceEncoder',
16 |     'ModelParallelTransformerSentenceEncoderLayer',
17 | ]
18 | 


--------------------------------------------------------------------------------
/fairseq/models/bart/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .hub_interface import *  # noqa
7 | from .model import *  # noqa
8 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | 
10 | # automatically import any Python files in the models/huggingface/ directory
11 | models_dir = os.path.dirname(__file__)
12 | for file in os.listdir(models_dir):
13 |     path = os.path.join(models_dir, file)
14 |     if (
15 |         not file.startswith('_')
16 |         and not file.startswith('.')
17 |         and (file.endswith('.py') or os.path.isdir(path))
18 |     ):
19 |         model_name = file[:file.find('.py')] if file.endswith('.py') else file
20 |         module = importlib.import_module('fairseq.models.huggingface.' + model_name)
21 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.circleci/deploy.sh:
--------------------------------------------------------------------------------
 1 | cd docs
 2 | 
 3 | function deploy_doc(){
 4 | 	echo "Creating doc at commit $1 and pushing to folder $2"
 5 | 	git checkout $1
 6 | 	if [ ! -z "$2" ]
 7 | 	then
 8 | 		if [ -d "$dir/$2" ]; then
 9 | 			echo "Directory" $2 "already exists"
10 | 		else
11 | 			echo "Pushing version" $2
12 | 			make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
13 | 		fi
14 | 	else
15 | 		echo "Pushing master"
16 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
17 | 	fi
18 | }
19 | 
20 | deploy_doc "master"
21 | deploy_doc "b33a385" v1.0.0
22 | deploy_doc "fe02e45" v1.1.0
23 | deploy_doc "89fd345" v1.2.0
24 | deploy_doc "fc9faa8" v2.0.0
25 | deploy_doc "3ddce1d" v2.1.1
26 | deploy_doc "3616209" v2.2.0
27 | deploy_doc "d0f8b9a" v2.3.0
28 | deploy_doc "6664ea9" v2.4.0
29 | deploy_doc "fb560dc" v2.5.0
30 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source=transformers
 3 | omit =
 4 |     # skip convertion scripts from testing for now
 5 |     */convert_*
 6 |     */__main__.py
 7 | [report]
 8 | exclude_lines =
 9 |     pragma: no cover
10 |     raise
11 |     except
12 |     register_parameter


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.github/ISSUE_TEMPLATE/---new-benchmark.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F5A5 New benchmark"
 3 | about: Benchmark a part of this library and share your results
 4 | title: "[Benchmark]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🖥 Benchmarking `transformers`
11 | 
12 | ## Benchmark
13 | 
14 | Which part of `transformers` did you benchmark?
15 | 
16 | ## Set-up
17 | 
18 | What did you run your benchmarks on? Please include details, such as: CPU, GPU? If using multiple GPUs, which parallelization did you use?
19 | 
20 | ## Results
21 | 
22 | Put your results here!
23 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.github/ISSUE_TEMPLATE/--new-model-addition.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F31F New model addition"
 3 | about: Submit a proposal/request to implement a new Transformer-based model
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🌟 New model addition
11 | 
12 | ## Model description
13 | 
14 | <!-- Important information -->
15 | 
16 | ## Open source status
17 | 
18 | * [ ] the model implementation is available: (give details)
19 | * [ ] the model weights are available: (give details)
20 | * [ ] who are the authors: (mention them, if possible by @gh-username)
21 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680 Feature request"
 3 | about: Submit a proposal/request for a new transformers feature
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # 🚀 Feature request
11 | 
12 | <!-- A clear and concise description of the feature proposal.
13 |      Please provide a link to the paper and code in case they exist. -->
14 | 
15 | ## Motivation
16 | 
17 | <!-- Please outline the motivation for the proposal. Is your feature request
18 |      related to a problem? e.g., I'm always frustrated when [...]. If this is related
19 |      to another GitHub issue, please link here too. -->
20 | 
21 | ## Your contribution
22 | 
23 | <!-- Is there any way that you could help, e.g. by submitting a PR?
24 |      Make sure to read the CONTRIBUTING.MD readme:
25 |      https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md -->
26 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.github/ISSUE_TEMPLATE/question-help.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓ Questions & Help"
 3 | about: Post your general questions on Stack Overflow tagged huggingface-transformers
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # ❓ Questions & Help
11 | 
12 | <!-- The GitHub issue tracker is primarly intended for bugs, feature requests,
13 |      new models and benchmarks, and migration questions. For all other questions,
14 |      we direct you to Stack Overflow (SO) where a whole community of PyTorch and
15 |      Tensorflow enthusiast can help you out. Make sure to tag your question with the
16 |      right deep learning framework as well as the huggingface-transformers tag: 
17 |      https://stackoverflow.com/questions/tagged/huggingface-transformers 
18 |      
19 |      If your question wasn't answered after a period of time on Stack Overflow, you
20 |      can always open a question on GitHub. You should then link to the SO question 
21 |      that you posted.
22 |      -->
23 | 
24 | ## Details
25 | <!-- Description of your issue -->
26 | 
27 | <!-- You should first ask your question on SO, and only if
28 |      you didn't get an answer ask it here on GitHub. -->
29 | **A link to original question on Stack Overflow**: 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | # Number of days of inactivity before an issue becomes stale
 2 | daysUntilStale: 60
 3 | # Number of days of inactivity before a stale issue is closed
 4 | daysUntilClose: 7
 5 | # Issues with these labels will never be considered stale
 6 | exemptLabels:
 7 |   - pinned
 8 |   - security
 9 | # Label to use when marking an issue as stale
10 | staleLabel: wontfix
11 | # Comment to post when marking an issue as stale. Set to `false` to disable
12 | markComment: >
13 |   This issue has been automatically marked as stale because it has not had
14 |   recent activity. It will be closed if no further activity occurs. Thank you
15 |   for your contributions.
16 | # Comment to post when closing a stale issue. Set to `false` to disable
17 | closeComment: false


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.github/workflows/github-push.yml:
--------------------------------------------------------------------------------
 1 | name: GitHub-hosted runner
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |   check_code_quality:
 7 |     runs-on: ubuntu-18.04
 8 |     steps:
 9 |     - uses: actions/checkout@v2
10 |     - name: Set up Python
11 |       uses: actions/setup-python@v1
12 |       with:
13 |         python-version: 3.7
14 |     - name: Install dependencies
15 |       run: |
16 |         pip install .[tf,torch,quality]
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/.github/workflows/self-push.yml:
--------------------------------------------------------------------------------
 1 | name: Self-hosted runner (push)
 2 | 
 3 | on: 
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 | 
 9 | 
10 | jobs:
11 |   run_tests_torch_and_tf_gpu:
12 |     runs-on: self-hosted
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Python version
16 |       run: |
17 |         which python
18 |         python --version
19 |         pip --version
20 |     - name: Current dir
21 |       run: pwd
22 |     - run: nvidia-smi
23 |     - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
24 |       run: |
25 |         python -m venv .env
26 |         source .env/bin/activate
27 |         which python
28 |         python --version
29 |         pip --version
30 |     - name: Install dependencies
31 |       run: |
32 |         source .env/bin/activate
33 |         pip install .[sklearn,tf,torch,testing]
34 | 
35 |     - name: Are GPUs recognized by our DL frameworks
36 |       run: |
37 |         source .env/bin/activate
38 |         python -c "import torch; print(torch.cuda.is_available())"
39 |         python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))"
40 | 
41 |     - name: Run all non-slow tests on GPU
42 |       env:
43 |         TF_FORCE_GPU_ALLOW_GROWTH: "true"
44 |         # TF_GPU_MEMORY_LIMIT: 4096
45 |         OMP_NUM_THREADS: 1
46 |         USE_CUDA: yes
47 |       run: |
48 |         source .env/bin/activate
49 |         python -m pytest -n 2 --dist=loadfile -s -v ./tests/
50 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: quality style test test-examples
 2 | 
 3 | # Check that source code meets quality standards
 4 | 
 5 | quality:
 6 | 	black --check --line-length 119 --target-version py35 examples templates tests src utils
 7 | 	isort --check-only --recursive examples templates tests src utils
 8 | 	flake8 examples templates tests src utils
 9 | 
10 | # Format source code automatically
11 | 
12 | style:
13 | 	black --line-length 119 --target-version py35 examples templates tests src utils
14 | 	isort --recursive examples templates tests src utils
15 | 
16 | # Run tests for the library
17 | 
18 | test:
19 | 	python -m pytest -n auto --dist=loadfile -s -v ./tests/
20 | 
21 | # Run tests for examples
22 | 
23 | test-examples:
24 | 	python -m pytest -n auto --dist=loadfile -s -v ./examples/
25 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/deploy_multi_version_doc.sh:
--------------------------------------------------------------------------------
 1 | cd docs
 2 | 
 3 | function deploy_doc(){
 4 | 	echo "Creating doc at commit $1 and pushing to folder $2"
 5 | 	git checkout $1
 6 | 	if [ ! -z "$2" ] 
 7 | 	then
 8 | 		echo "Pushing version" $2
 9 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2
10 | 	else
11 | 		echo "Pushing master"
12 | 		make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
13 | 	fi
14 | }
15 | 
16 | deploy_doc "master" 
17 | deploy_doc "b33a385" v1.0.0
18 | deploy_doc "fe02e45" v1.1.0
19 | deploy_doc "89fd345" v1.2.0
20 | deploy_doc "fc9faa8" v2.0.0
21 | deploy_doc "3ddce1d" v2.1.1
22 | deploy_doc "f2f3294" v2.2.0
23 | deploy_doc "d0f8b9a" v2.3.0
24 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:latest
2 | 
3 | RUN git clone https://github.com/NVIDIA/apex.git && cd apex && python setup.py install --cuda_ext --cpp_ext
4 | 
5 | RUN pip install transformers
6 | 
7 | WORKDIR /workspace


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/_static/css/Calibre-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/_static/css/Calibre-Light.ttf


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/_static/css/Calibre-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/_static/css/Calibre-Medium.otf


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/_static/css/Calibre-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/_static/css/Calibre-Regular.otf


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/_static/css/Calibre-Thin.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/_static/css/Calibre-Thin.otf


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/_static/css/code-snippets.css:
--------------------------------------------------------------------------------
 1 | 
 2 | .highlight .c1, .highlight .sd{
 3 |     color: #999
 4 | }
 5 | 
 6 | .highlight .nn, .highlight .k, .highlight .s1, .highlight .nb, .highlight .bp, .highlight .kc {
 7 |     color: #FB8D68;
 8 | }
 9 | 
10 | .highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow {
11 |     color: #6670FF;
12 | }


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/bertology.rst:
--------------------------------------------------------------------------------
 1 | BERTology
 2 | ---------
 3 | 
 4 | There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT (that some call "BERTology"). Some good examples of this field are:
 5 | 
 6 | 
 7 | * BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick: https://arxiv.org/abs/1905.05950
 8 | * Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
 9 | * What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D. Manning: https://arxiv.org/abs/1906.04341
10 | 
11 | In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to help people access the inner representations, mainly adapted  from the great work of Paul Michel (https://arxiv.org/abs/1905.10650):
12 | 
13 | 
14 | * accessing all the hidden-states of BERT/GPT/GPT-2,
15 | * accessing all the attention weights for each head of BERT/GPT/GPT-2,
16 | * retrieving heads output values and gradients to be able to compute head importance score and prune head as explained in https://arxiv.org/abs/1905.10650.
17 | 
18 | To help you understand and use these features, we have added a specific example script: `bertology.py <https://github.com/huggingface/transformers/blob/master/examples/run_bertology.py>`_ while extract information and prune a model pre-trained on GLUE.
19 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | ../../examples/README.md


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/favicon.ico


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/imgs/transformers_logo_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/imgs/transformers_logo_name.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_constant_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_constant_schedule.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_cosine_hard_restarts_schedule.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_cosine_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_cosine_schedule.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_cosine_warm_restarts_schedule.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_linear_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/docs/source/imgs/warmup_linear_schedule.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/main_classes/configuration.rst:
--------------------------------------------------------------------------------
 1 | Configuration
 2 | ----------------------------------------------------
 3 | 
 4 | The base class ``PretrainedConfig`` implements the common methods for loading/saving a configuration either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository).
 5 | 
 6 | ``PretrainedConfig``
 7 | ~~~~~~~~~~~~~~~~~~~~~
 8 | 
 9 | .. autoclass:: transformers.PretrainedConfig
10 |     :members:
11 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/main_classes/model.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ----------------------------------------------------
 3 | 
 4 | The base class ``PreTrainedModel`` implements the common methods for loading/saving a model either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS S3 repository).
 5 | 
 6 | ``PreTrainedModel`` also implements a few methods which are common among all the models to:
 7 | 
 8 | - resize the input token embeddings when new tokens are added to the vocabulary
 9 | - prune the attention heads of the model.
10 | 
11 | ``PreTrainedModel``
12 | ~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | .. autoclass:: transformers.PreTrainedModel
15 |     :members:
16 | 
17 | ``TFPreTrainedModel``
18 | ~~~~~~~~~~~~~~~~~~~~~
19 | 
20 | .. autoclass:: transformers.TFPreTrainedModel
21 |     :members:
22 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/docs/source/main_classes/tokenizer.rst:
--------------------------------------------------------------------------------
 1 | Tokenizer
 2 | ----------------------------------------------------
 3 | 
 4 | The base class ``PreTrainedTokenizer`` implements the common methods for loading/saving a tokenizer either from a local file or directory, or from a pretrained tokenizer provided by the library (downloaded from HuggingFace's AWS S3 repository).
 5 | 
 6 | ``PreTrainedTokenizer`` is the main entry point into tokenizers as it also implements the main methods for using all the tokenizers:
 7 | 
 8 | - tokenizing, converting tokens to ids and back and encoding/decoding,
 9 | - adding new tokens to the vocabulary in a way that is independant of the underlying structure (BPE, SentencePiece...),
10 | - managing special tokens (adding them, assigning them to roles, making sure they are not split during tokenization)
11 | 
12 | ``PreTrainedTokenizer``
13 | ~~~~~~~~~~~~~~~~~~~~~~~~
14 | 
15 | .. autoclass:: transformers.PreTrainedTokenizer
16 |     :members:
17 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/contrib/README.md:
--------------------------------------------------------------------------------
1 | # Community contributed examples
2 | 
3 | This folder contains examples which are not actively maintained (mostly contributed by the community).
4 | 
5 | Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working.
6 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/distillation/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | gitpython==3.0.2
4 | tensorboard>=1.14.0
5 | tensorboardX==1.8
6 | psutil==5.6.3
7 | scipy==1.3.1
8 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/distillation/training_configs/distilbert-base-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 28996
14 |   }
15 |   


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/distillation/training_configs/distilbert-base-multilingual-cased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 119547
14 |   }
15 |   


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/distillation/training_configs/distilbert-base-uncased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"activation": "gelu",
 3 | 	"attention_dropout": 0.1,
 4 | 	"dim": 768,
 5 | 	"dropout": 0.1,
 6 | 	"hidden_dim": 3072,
 7 | 	"initializer_range": 0.02,
 8 | 	"max_position_embeddings": 512,
 9 | 	"n_heads": 12,
10 | 	"n_layers": 6,
11 | 	"sinusoidal_pos_embds": true,
12 | 	"tie_weights_": true,
13 | 	"vocab_size": 30522
14 |   }
15 |   


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/distillation/training_configs/distilgpt2.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"initializer_range": 0.02,
 3 | 	"layer_norm_epsilon": 0.00001,
 4 | 	"n_ctx": 1024,
 5 | 	"n_embd": 768,
 6 | 	"n_head": 12,
 7 | 	"n_layer": 6,
 8 | 	"n_positions": 1024,
 9 | 	"vocab_size": 50257
10 | }


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/distillation/training_configs/distilroberta-base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "vocab_size": 50265,
 3 |     "hidden_size": 768,
 4 |     "num_hidden_layers": 6,
 5 |     "num_attention_heads": 12,
 6 |     "intermediate_size": 3072,
 7 |     "hidden_act": "gelu",
 8 |     "hidden_dropout_prob": 0.1,
 9 |     "attention_probs_dropout_prob": 0.1,
10 |     "max_position_embeddings": 514,
11 |     "type_vocab_size": 1,
12 |     "initializer_range": 0.02,
13 |     "layer_norm_eps": 0.00001
14 | }


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/ner/run.sh:
--------------------------------------------------------------------------------
 1 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \
 2 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
 3 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \
 4 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
 5 | curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \
 6 | | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
 7 |  wget "https://raw.githubusercontent.com/stefan-it/fine-tuned-berts-seq/master/scripts/preprocess.py"
 8 | export MAX_LENGTH=128
 9 | export BERT_MODEL=bert-base-multilingual-cased
10 | python3 preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
11 | python3 preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
12 | python3 preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
13 | cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
14 | export OUTPUT_DIR=germeval-model
15 | export BATCH_SIZE=32
16 | export NUM_EPOCHS=3
17 | export SAVE_STEPS=750
18 | export SEED=1
19 | 
20 | python3 run_ner.py --data_dir ./ \
21 | --model_type bert \
22 | --labels ./labels.txt \
23 | --model_name_or_path $BERT_MODEL \
24 | --output_dir $OUTPUT_DIR \
25 | --max_seq_length  $MAX_LENGTH \
26 | --num_train_epochs $NUM_EPOCHS \
27 | --per_gpu_train_batch_size $BATCH_SIZE \
28 | --save_steps $SAVE_STEPS \
29 | --seed $SEED \
30 | --do_train \
31 | --do_eval \
32 | --do_predict
33 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/pplm/imgs/headfigure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/examples/pplm/imgs/headfigure.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/pplm/imgs/wooly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/examples/pplm/imgs/wooly.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/pplm/pplm_classification_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class ClassificationHead(torch.nn.Module):
 5 |     """Classification Head for  transformer encoders"""
 6 | 
 7 |     def __init__(self, class_size, embed_size):
 8 |         super().__init__()
 9 |         self.class_size = class_size
10 |         self.embed_size = embed_size
11 |         # self.mlp1 = torch.nn.Linear(embed_size, embed_size)
12 |         # self.mlp2 = (torch.nn.Linear(embed_size, class_size))
13 |         self.mlp = torch.nn.Linear(embed_size, class_size)
14 | 
15 |     def forward(self, hidden_state):
16 |         # hidden_state = F.relu(self.mlp1(hidden_state))
17 |         # hidden_state = self.mlp2(hidden_state)
18 |         logits = self.mlp(hidden_state)
19 |         return logits
20 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorboardX
2 | tensorboard
3 | scikit-learn
4 | seqeval
5 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/summarization/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | 
3 | # For ROUGE
4 | nltk
5 | py-rouge
6 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/tests_samples/.gitignore:
--------------------------------------------------------------------------------
1 | *.*
2 | cache*
3 | temp*
4 | !*.tsv
5 | !*.json
6 | !.gitignore


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/tests_samples/MRPC/dev.tsv:
--------------------------------------------------------------------------------
1 | ﻿Quality	#1 ID	#2 ID	#1 String	#2 String
2 | 1	1355540	1355592	He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .	" The foodservice pie business does not fit our long-term growth strategy .
3 | 0	2029631	2029565	Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .	His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0	487993	487952	The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .	The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1	1989515	1989458	The AFL-CIO is waiting until October to decide if it will endorse a candidate .	The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0	1783137	1782659	No dates have been set for the civil or the criminal trial .	No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1	3039165	3039036	Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .	It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/examples/tests_samples/MRPC/train.tsv:
--------------------------------------------------------------------------------
1 | ﻿Quality	#1 ID	#2 ID	#1 String	#2 String
2 | 1	1355540	1355592	He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .	" The foodservice pie business does not fit our long-term growth strategy .
3 | 0	2029631	2029565	Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .	His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
4 | 0	487993	487952	The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .	The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
5 | 1	1989515	1989458	The AFL-CIO is waiting until October to decide if it will endorse a candidate .	The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
6 | 0	1783137	1782659	No dates have been set for the civil or the criminal trial .	No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
7 | 1	3039165	3039036	Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed .	It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
8 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/binwang/xlnet-base-cased/README.md:
--------------------------------------------------------------------------------
1 | This model is pre-trained **XLNET** with 12 layers.
2 | 
3 | It comes with paper: SBERT-WK: A Sentence Embedding Method By Dissecting BERT-based Word Models
4 | 
5 | Project Page: [SBERT-WK](https://github.com/BinWang28/SBERT-WK-Sentence-Embedding)
6 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/canwenxu/BERT-of-Theseus-MNLI/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | thumbnail: https://raw.githubusercontent.com/JetRunner/BERT-of-Theseus/master/bert-of-theseus.png
 3 | ---
 4 | 
 5 | # BERT-of-Theseus
 6 | See our paper ["BERT-of-Theseus: Compressing BERT by Progressive Module Replacing"](http://arxiv.org/abs/2002.02925).
 7 | 
 8 | BERT-of-Theseus is a new compressed BERT by progressively replacing the components of the original BERT.
 9 | 
10 | ![BERT of Theseus](https://github.com/JetRunner/BERT-of-Theseus/blob/master/bert-of-theseus.png?raw=true)
11 | 
12 | ## Load Pretrained Model on MNLI
13 | 
14 | We provide a 6-layer pretrained model on MNLI as a general-purpose model, which can transfer to other sentence classification tasks, outperforming DistillBERT (with the same 6-layer structure) on six tasks of GLUE (dev set).
15 | 
16 | | Method          | MNLI | MRPC | QNLI | QQP  | RTE  | SST-2 | STS-B |
17 | |-----------------|------|------|------|------|------|-------|-------|
18 | | BERT-base       | 83.5 | 89.5 | 91.2 | 89.8 | 71.1 | 91.5  | 88.9  |
19 | | DistillBERT     | 79.0 | 87.5 | 85.3 | 84.9 | 59.9 | 90.7  | 81.2  |
20 | | BERT-of-Theseus | 82.1 | 87.5 | 88.8 | 88.8 | 70.1 | 91.8  | 87.8  |
21 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/fmikaelian/camembert-base-fquad/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: french
 3 | ---
 4 | 
 5 | # camembert-base-fquad
 6 | 
 7 | ## Description
 8 | 
 9 | A baseline model for question-answering in french ([CamemBERT](https://camembert-model.fr/) model fine-tuned on [FQuAD](https://fquad.illuin.tech/))
10 | 
11 | ## Training hyperparameters
12 | 
13 | ```shell
14 | python3 ./examples/run_squad.py \
15 | --model_type camembert \
16 | --model_name_or_path camembert-base \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file train.json \
21 | --predict_file valid.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3 \
29 | --save_steps 10000
30 | ``` 
31 | 
32 | ## Evaluation results
33 | 
34 | ```shell
35 | {"f1": 77.24515316052342, "exact_match": 52.82308657465496}
36 | ```
37 | 
38 | ## Usage
39 | 
40 | ```python
41 | from transformers import pipeline
42 | 
43 | nlp = pipeline('question-answering', model='fmikaelian/camembert-base-fquad', tokenizer='fmikaelian/camembert-base-fquad')
44 | 
45 | nlp({
46 |     'question': "Qui est Claude Monet?",
47 |     'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
48 | })
49 | ```


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/fmikaelian/camembert-base-squad/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: french
 3 | ---
 4 | 
 5 | # camembert-base-squad
 6 | 
 7 | ## Description
 8 | 
 9 | A baseline model for question-answering in french ([CamemBERT](https://camembert-model.fr/) model fine-tuned on [french-translated SQuAD 1.1 dataset](https://github.com/Alikabbadj/French-SQuAD))
10 | 
11 | ## Training hyperparameters
12 | 
13 | ```shell
14 | python3 ./examples/run_squad.py \
15 | --model_type camembert \
16 | --model_name_or_path camembert-base \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file SQuAD-v1.1-train_fr_ss999_awstart2_net.json \
21 | --predict_file SQuAD-v1.1-dev_fr_ss999_awstart2_net.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output3 \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3 \
29 | --save_steps 10000
30 | ``` 
31 | 
32 | ## Evaluation results
33 | 
34 | ```shell
35 | {"f1": 79.8570684959745, "exact_match": 59.21327108373895}
36 | ```
37 | 
38 | ## Usage
39 | 
40 | ```python
41 | from transformers import pipeline
42 | 
43 | nlp = pipeline('question-answering', model='fmikaelian/camembert-base-squad', tokenizer='fmikaelian/camembert-base-squad')
44 | 
45 | nlp({
46 |     'question': "Qui est Claude Monet?",
47 |     'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
48 | })
49 | ```


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/fmikaelian/flaubert-base-uncased-squad/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: french
 3 | ---
 4 | 
 5 | # flaubert-base-uncased-squad
 6 | 
 7 | ## Description
 8 | 
 9 | A baseline model for question-answering in french ([flaubert](https://github.com/getalp/Flaubert) model fine-tuned on [french-translated SQuAD 1.1 dataset](https://github.com/Alikabbadj/French-SQuAD))
10 | 
11 | ## Training hyperparameters
12 | 
13 | ```shell
14 | python3 ./examples/run_squad.py \
15 | --model_type flaubert \
16 | --model_name_or_path flaubert-base-uncased \
17 | --do_train \
18 | --do_eval \
19 | --do_lower_case \
20 | --train_file SQuAD-v1.1-train_fr_ss999_awstart2_net.json \
21 | --predict_file SQuAD-v1.1-dev_fr_ss999_awstart2_net.json \
22 | --learning_rate 3e-5 \
23 | --num_train_epochs 2 \
24 | --max_seq_length 384 \
25 | --doc_stride 128 \
26 | --output_dir output \
27 | --per_gpu_eval_batch_size=3 \
28 | --per_gpu_train_batch_size=3
29 | ``` 
30 | 
31 | ## Evaluation results
32 | 
33 | ```shell
34 | {"f1": 68.66174806561969, "exact_match": 49.299692063176714}
35 | ```
36 | 
37 | ## Usage
38 | 
39 | ```python
40 | from transformers import pipeline
41 | 
42 | nlp = pipeline('question-answering', model='fmikaelian/flaubert-base-uncased-squad', tokenizer='fmikaelian/flaubert-base-uncased-squad')
43 | 
44 | nlp({
45 |     'question': "Qui est Claude Monet?",
46 |     'context': "Claude Monet, né le 14 novembre 1840 à Paris et mort le 5 décembre 1926 à Giverny, est un peintre français et l’un des fondateurs de l'impressionnisme."
47 | })
48 | ```


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/jplu/tf-camembert-base/README.md:
--------------------------------------------------------------------------------
 1 | # Tensorflow CamemBERT
 2 | 
 3 | In this repository you will find different versions of the CamemBERT model for Tensorflow.
 4 | 
 5 | ## CamemBERT
 6 | 
 7 | [CamemBERT](https://camembert-model.fr/) is a state-of-the-art language model for French based on the RoBERTa architecture pretrained on the French subcorpus of the newly available multilingual corpus OSCAR.
 8 | 
 9 | ## Model Weights
10 | 
11 | | Model                            | Downloads
12 | | -------------------------------- | ---------------------------------------------------------------------------------------------------------------
13 | | `jplu/tf-camembert-base`   | [`config.json`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-camembert-base/config.json) • [`tf_model.h5`](https://s3.amazonaws.com/models.huggingface.co/bert/jplu/tf-camembert-base/tf_model.h5)
14 | 
15 | ## Usage
16 | 
17 | With Transformers >= 2.4 the Tensorflow models of CamemBERT can be loaded like:
18 | 
19 | ```python
20 | from transformers import TFCamembertModel
21 | 
22 | model = TFCamembertModel.from_pretrained("jplu/tf-camembert-base")
23 | ```
24 | 
25 | ## Huggingface model hub
26 | 
27 | All models are available on the [Huggingface model hub](https://huggingface.co/jplu).
28 | 
29 | ## Acknowledgments
30 | 
31 | Thanks to all the Huggingface team for the support and their amazing library!
32 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/julien-c/EsperBERTo-small-pos/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: esperanto
 3 | thumbnail: https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png
 4 | ---
 5 | 
 6 | # EsperBERTo: RoBERTa-like Language model trained on Esperanto
 7 | 
 8 | **Companion model to blog post https://huggingface.co/blog/how-to-train** 🔥
 9 | 
10 | ## Training Details
11 | 
12 | - current checkpoint: 566000
13 | - machine name: `galinette`
14 | 
15 | 
16 | ![](https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png)
17 | 
18 | ## Example pipeline
19 | 
20 | ```python
21 | from transformers import TokenClassificationPipeline, pipeline
22 | 
23 | 
24 | MODEL_PATH = "./models/EsperBERTo-small-pos/"
25 | 
26 | nlp = pipeline(
27 |     "ner",
28 |     model=MODEL_PATH,
29 |     tokenizer=MODEL_PATH,
30 | )
31 | # or instantiate a TokenClassificationPipeline directly.
32 | 
33 | nlp("Mi estas viro kej estas tago varma.")
34 | 
35 | # {'entity': 'PRON', 'score': 0.9979867339134216, 'word': ' Mi'}
36 | # {'entity': 'VERB', 'score': 0.9683094620704651, 'word': ' estas'}
37 | # {'entity': 'VERB', 'score': 0.9797462821006775, 'word': ' estas'}
38 | # {'entity': 'NOUN', 'score': 0.8509314060211182, 'word': ' tago'}
39 | # {'entity': 'ADJ', 'score': 0.9996201395988464, 'word': ' varma'}
40 | ```


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/julien-c/EsperBERTo-small/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: esperanto
 3 | thumbnail: https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png
 4 | ---
 5 | 
 6 | # EsperBERTo: RoBERTa-like Language model trained on Esperanto
 7 | 
 8 | **Companion model to blog post https://huggingface.co/blog/how-to-train** 🔥
 9 | 
10 | ## Training Details
11 | 
12 | - current checkpoint: 566000
13 | - machine name: `galinette`
14 | 
15 | 
16 | ![](https://huggingface.co/blog/assets/EsperBERTo-thumbnail-v2.png)
17 | 
18 | ## Example pipeline
19 | 
20 | ```python
21 | from transformers import pipeline
22 | 
23 | fill_mask = pipeline(
24 |     "fill-mask",
25 |     model="julien-c/EsperBERTo-small",
26 |     tokenizer="julien-c/EsperBERTo-small"
27 | )
28 | 
29 | fill_mask("Jen la komenco de bela <mask>.")
30 | 
31 | # This is the beginning of a beautiful <mask>.
32 | # =>
33 | 
34 | # {
35 | #     'score':0.06502299010753632
36 | #     'sequence':'<s> Jen la komenco de bela vivo.</s>'
37 | #     'token':1099
38 | # }
39 | # {
40 | #     'score':0.0421181358397007
41 | #     'sequence':'<s> Jen la komenco de bela vespero.</s>'
42 | #     'token':5100
43 | # }
44 | # {
45 | #     'score':0.024884626269340515
46 | #     'sequence':'<s> Jen la komenco de bela laboro.</s>'
47 | #     'token':1570
48 | # }
49 | # {
50 | #     'score':0.02324388362467289
51 | #     'sequence':'<s> Jen la komenco de bela tago.</s>'
52 | #     'token':1688
53 | # }
54 | # {
55 | #     'score':0.020378097891807556
56 | #     'sequence':'<s> Jen la komenco de bela festo.</s>'
57 | #     'token':4580
58 | # }
59 | ```
60 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/julien-c/bert-xsmall-dummy/README.md:
--------------------------------------------------------------------------------
 1 | ## How to build a dummy model
 2 | 
 3 | 
 4 | ```python
 5 | from transformers.configuration_bert import BertConfig
 6 | from transformers.modeling_bert import BertForMaskedLM
 7 | from transformers.modeling_tf_bert import TFBertForMaskedLM
 8 | from transformers.tokenization_bert import BertTokenizer
 9 | 
10 | 
11 | SMALL_MODEL_IDENTIFIER = "julien-c/bert-xsmall-dummy"
12 | DIRNAME = "./bert-xsmall-dummy"
13 | 
14 | config = BertConfig(10, 20, 1, 1, 40)
15 | 
16 | model = BertForMaskedLM(config)
17 | model.save_pretrained(DIRNAME)
18 | 
19 | tf_model = TFBertForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
20 | tf_model.save_pretrained(DIRNAME)
21 | 
22 | # Slightly different for tokenizer.
23 | # tokenizer = BertTokenizer.from_pretrained(DIRNAME)
24 | # tokenizer.save_pretrained()
25 | ```
26 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/julien-c/dummy-unknown/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags:
 3 | - ci
 4 | ---
 5 | 
 6 | ## Dummy model used for unit testing and CI
 7 | 
 8 | 
 9 | ```python
10 | import json
11 | import os
12 | from transformers.configuration_roberta import RobertaConfig
13 | from transformers import RobertaForMaskedLM, TFRobertaForMaskedLM
14 | 
15 | DIRNAME = "./dummy-unknown"
16 | 
17 | 
18 | config = RobertaConfig(10, 20, 1, 1, 40)
19 | 
20 | model = RobertaForMaskedLM(config)
21 | model.save_pretrained(DIRNAME)
22 | 
23 | tf_model = TFRobertaForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
24 | tf_model.save_pretrained(DIRNAME)
25 | 
26 | # Tokenizer:
27 | 
28 | vocab = [
29 |     "l",
30 |     "o",
31 |     "w",
32 |     "e",
33 |     "r",
34 |     "s",
35 |     "t",
36 |     "i",
37 |     "d",
38 |     "n",
39 |     "\u0120",
40 |     "\u0120l",
41 |     "\u0120n",
42 |     "\u0120lo",
43 |     "\u0120low",
44 |     "er",
45 |     "\u0120lowest",
46 |     "\u0120newer",
47 |     "\u0120wider",
48 |     "<unk>",
49 | ]
50 | vocab_tokens = dict(zip(vocab, range(len(vocab))))
51 | merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""]
52 | 
53 | vocab_file = os.path.join(DIRNAME, "vocab.json")
54 | merges_file = os.path.join(DIRNAME, "merges.txt")
55 | with open(vocab_file, "w", encoding="utf-8") as fp:
56 |     fp.write(json.dumps(vocab_tokens) + "\n")
57 | with open(merges_file, "w", encoding="utf-8") as fp:
58 |     fp.write("\n".join(merges))
59 | ```
60 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/lysandre/arxiv-nlp/README.md:
--------------------------------------------------------------------------------
1 | # ArXiv-NLP GPT-2 checkpoint
2 | 
3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` fine-tuned to ArXiv paper on the computational linguistics field.
4 | 
5 | ## Training data
6 | 
7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 80MB of text from the computational linguistics (cs.CL) field.


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/lysandre/arxiv/README.md:
--------------------------------------------------------------------------------
1 | # ArXiv GPT-2 checkpoint
2 | 
3 | This is a GPT-2 small checkpoint for PyTorch. It is the official `gpt2-small` finetuned to ArXiv paper on physics fields.
4 | 
5 | ## Training data
6 | 
7 | This model was trained on a subset of ArXiv papers that were parsed from PDF to txt. The resulting data is made of 130MB of text, mostly from quantum physics (quant-ph) and other physics sub-fields.
8 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/severinsimmler/literary-german-bert/kfold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/model_cards/severinsimmler/literary-german-bert/kfold.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/model_cards/severinsimmler/literary-german-bert/prosa-jahre.png


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | ensure_newline_before_comments = True
 3 | force_grid_wrap = 0
 4 | include_trailing_comma = True
 5 | known_first_party = transformers
 6 | known_third_party =
 7 |     absl
 8 |     fairseq
 9 |     fastprogress
10 |     git
11 |     h5py
12 |     MeCab
13 |     nltk
14 |     numpy
15 |     packaging
16 |     PIL
17 |     psutil
18 |     pytorch_lightning
19 |     seqeval
20 |     sklearn
21 |     tensorboardX
22 |     tensorflow
23 |     tensorflow_datasets
24 |     torch
25 |     torchtext
26 |     torchvision
27 |     torch_xla
28 | 
29 | line_length = 119
30 | lines_after_imports = 2
31 | multi_line_output = 3
32 | use_parentheses = True
33 | 
34 | [flake8]
35 | ignore = E203, E501, W503
36 | max-line-length = 119
37 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/src/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from argparse import ArgumentParser
 3 | 
 4 | 
 5 | class BaseTransformersCLICommand(ABC):
 6 |     @staticmethod
 7 |     @abstractmethod
 8 |     def register_subcommand(parser: ArgumentParser):
 9 |         raise NotImplementedError()
10 | 
11 |     @abstractmethod
12 |     def run(self):
13 |         raise NotImplementedError()
14 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/src/transformers/commands/download.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from transformers.commands import BaseTransformersCLICommand
 4 | 
 5 | 
 6 | def download_command_factory(args):
 7 |     return DownloadCommand(args.model, args.cache_dir, args.force)
 8 | 
 9 | 
10 | class DownloadCommand(BaseTransformersCLICommand):
11 |     @staticmethod
12 |     def register_subcommand(parser: ArgumentParser):
13 |         download_parser = parser.add_parser("download")
14 |         download_parser.add_argument(
15 |             "--cache-dir", type=str, default=None, help="Path to location to store the models"
16 |         )
17 |         download_parser.add_argument(
18 |             "--force", action="store_true", help="Force the model to be download even if already in cache-dir"
19 |         )
20 |         download_parser.add_argument("model", type=str, help="Name of the model to download")
21 |         download_parser.set_defaults(func=download_command_factory)
22 | 
23 |     def __init__(self, model: str, cache: str, force: bool):
24 |         self._model = model
25 |         self._cache = cache
26 |         self._force = force
27 | 
28 |     def run(self):
29 |         from transformers import AutoModel, AutoTokenizer
30 | 
31 |         AutoModel.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
32 |         AutoTokenizer.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
33 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/src/transformers/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
 3 | # module, but to preserve other warnings. So, don't check this module at all.
 4 | 
 5 | from .metrics import is_sklearn_available
 6 | from .processors import (
 7 |     DataProcessor,
 8 |     InputExample,
 9 |     InputFeatures,
10 |     SingleSentenceClassificationProcessor,
11 |     SquadExample,
12 |     SquadFeatures,
13 |     SquadV1Processor,
14 |     SquadV2Processor,
15 |     glue_convert_examples_to_features,
16 |     glue_output_modes,
17 |     glue_processors,
18 |     glue_tasks_num_labels,
19 |     squad_convert_examples_to_features,
20 |     xnli_output_modes,
21 |     xnli_processors,
22 |     xnli_tasks_num_labels,
23 | )
24 | 
25 | 
26 | if is_sklearn_available():
27 |     from .metrics import glue_compute_metrics, xnli_compute_metrics
28 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/src/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # There's no way to ignore "F401 '...' imported but unused" warnings in this
3 | # module, but to preserve other warnings. So, don't check this module at all.
4 | 
5 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
6 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
7 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
8 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
9 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/src/transformers/tokenization_bart.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Facebook AI Research Team Authors and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .tokenization_roberta import RobertaTokenizer
17 | 
18 | 
19 | # vocab and merges same as roberta
20 | vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json"
21 | merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt"
22 | _all_bart_models = ["bart-large", "bart-large-mnli", "bart-large-cnn"]
23 | 
24 | 
25 | class BartTokenizer(RobertaTokenizer):
26 |     # merges and vocab same as Roberta
27 |     max_model_input_sizes = {m: 1024 for m in _all_bart_models}
28 |     pretrained_vocab_files_map = {
29 |         "vocab_file": {m: vocab_url for m in _all_bart_models},
30 |         "merges_file": {m: merges_url for m in _all_bart_models},
31 |     }
32 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/templates/adding_a_new_example_script/README.md:
--------------------------------------------------------------------------------
1 | # How to add a new example script in 🤗Transformers
2 | 
3 | This folder provide a template for adding a new example script implementing a training or inference task with the models in the  🤗Transformers library.
4 | 
5 | Currently only examples for PyTorch are provided which are adaptations of the library's SQuAD examples which implement single-GPU and distributed training with gradient accumulation and mixed-precision (using NVIDIA's apex library) to cover a reasonable range of use cases.
6 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/tests/__init__.py


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/tests/fixtures/dummy-config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "model_type": "roberta"
3 | }


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/tests/fixtures/empty.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/tests/fixtures/empty.txt


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/tests/fixtures/input.txt:
--------------------------------------------------------------------------------
1 | Who was Jim Henson ? ||| Jim Henson was a puppeteer
2 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/tests/fixtures/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/tests/fixtures/spiece.model


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/tests/fixtures/test_sentencepiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/models/huggingface/transformers/tests/fixtures/test_sentencepiece.model


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/tests/test_activations.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from transformers import is_torch_available
 4 | 
 5 | from .utils import require_torch
 6 | 
 7 | 
 8 | if is_torch_available():
 9 |     from transformers.activations import _gelu_python, get_activation, gelu_new
10 |     import torch
11 | 
12 | 
13 | @require_torch
14 | class TestActivations(unittest.TestCase):
15 |     def test_gelu_versions(self):
16 |         x = torch.Tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
17 |         torch_builtin = get_activation("gelu")
18 |         self.assertTrue(torch.eq(_gelu_python(x), torch_builtin(x)).all().item())
19 |         self.assertFalse(torch.eq(_gelu_python(x), gelu_new(x)).all().item())
20 | 
21 |     def test_get_activation(self):
22 |         get_activation("swish")
23 |         get_activation("relu")
24 |         get_activation("tanh")
25 |         with self.assertRaises(KeyError):
26 |             get_activation("bogus")
27 |         with self.assertRaises(KeyError):
28 |             get_activation(None)
29 | 


--------------------------------------------------------------------------------
/fairseq/models/huggingface/transformers/transformers-cli:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from argparse import ArgumentParser
 3 | 
 4 | from transformers.commands.convert import ConvertCommand
 5 | from transformers.commands.download import DownloadCommand
 6 | from transformers.commands.env import EnvironmentCommand
 7 | from transformers.commands.run import RunCommand
 8 | from transformers.commands.serving import ServeCommand
 9 | from transformers.commands.user import UserCommands
10 | 
11 | if __name__ == '__main__':
12 |     parser = ArgumentParser('Transformers CLI tool', usage='transformers-cli <command> [<args>]')
13 |     commands_parser = parser.add_subparsers(help='transformers-cli command helpers')
14 | 
15 |     # Register commands
16 |     ConvertCommand.register_subcommand(commands_parser)
17 |     DownloadCommand.register_subcommand(commands_parser)
18 |     EnvironmentCommand.register_subcommand(commands_parser)
19 |     RunCommand.register_subcommand(commands_parser)
20 |     ServeCommand.register_subcommand(commands_parser)
21 |     UserCommands.register_subcommand(commands_parser)
22 | 
23 |     # Let's go
24 |     args = parser.parse_args()
25 | 
26 |     if not hasattr(args, 'func'):
27 |         parser.print_help()
28 |         exit(1)
29 | 
30 |     # Run
31 |     service = args.func(args)
32 |     service.run()
33 | 


--------------------------------------------------------------------------------
/fairseq/models/nat/__init__.py:
--------------------------------------------------------------------------------
1 | from .fairseq_nat_model import *
2 | from .nonautoregressive_transformer import *
3 | from .nat_crf_transformer import *
4 | from .iterative_nonautoregressive_transformer import *
5 | from .cmlm_transformer import *
6 | from .levenshtein_transformer import *
7 | from .insertion_transformer import *
8 | 


--------------------------------------------------------------------------------
/fairseq/models/roberta/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .hub_interface import *  # noqa
 7 | from .model import *  # noqa
 8 | from .model_camembert import *  # noqa
 9 | from .model_xlmr import *  # noqa
10 | 


--------------------------------------------------------------------------------
/fairseq/models/roberta/model_xlmr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | Unsupervised Cross-lingual Representation Learning at Scale
 7 | """
 8 | 
 9 | from fairseq.models import register_model
10 | 
11 | from .hub_interface import RobertaHubInterface
12 | from .model import RobertaModel
13 | 
14 | 
15 | @register_model('xlmr')
16 | class XLMRModel(RobertaModel):
17 | 
18 |     @classmethod
19 |     def hub_models(cls):
20 |         return {
21 |             'xlmr.base': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz',
22 |             'xlmr.large': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz',
23 |         }
24 | 
25 |     @classmethod
26 |     def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs):
27 |         from fairseq import hub_utils
28 |         x = hub_utils.from_pretrained(
29 |             model_name_or_path,
30 |             checkpoint_file,
31 |             data_name_or_path,
32 |             archive_map=cls.hub_models(),
33 |             bpe=bpe,
34 |             load_checkpoint_heads=True,
35 |             **kwargs,
36 |         )
37 |         return RobertaHubInterface(x['args'], x['task'], x['models'][0])
38 | 


--------------------------------------------------------------------------------
/fairseq/models/wav2vec/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .wav2vec import *  # noqa
7 | from .wav2vec2 import *  # noqa
8 | from .wav2vec2_asr import *  # noqa
9 | 


--------------------------------------------------------------------------------
/fairseq/modules/conv_tbc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | from torch.nn.modules.utils import _single
 8 | 
 9 | 
10 | class ConvTBC(torch.nn.Module):
11 |     """1D convolution over an input of shape (time x batch x channel)
12 | 
13 |     The implementation uses gemm to perform the convolution. This implementation
14 |     is faster than cuDNN for small kernel sizes.
15 |     """
16 |     def __init__(self, in_channels, out_channels, kernel_size, padding=0):
17 |         super(ConvTBC, self).__init__()
18 |         self.in_channels = in_channels
19 |         self.out_channels = out_channels
20 |         self.kernel_size = _single(kernel_size)
21 |         self.padding = _single(padding)
22 | 
23 |         self.weight = torch.nn.Parameter(torch.Tensor(
24 |             self.kernel_size[0], in_channels, out_channels))
25 |         self.bias = torch.nn.Parameter(torch.Tensor(out_channels))
26 | 
27 |     def forward(self, input):
28 |         return torch.conv_tbc(input.contiguous(), self.weight, self.bias, self.padding[0])
29 | 
30 |     def __repr__(self):
31 |         s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}'
32 |              ', padding={padding}')
33 |         if self.bias is None:
34 |             s += ', bias=False'
35 |         s += ')'
36 |         return s.format(name=self.__class__.__name__, **self.__dict__)
37 | 


--------------------------------------------------------------------------------
/fairseq/modules/dynamicconv_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .dynamicconv_layer import DynamicconvLayer  # noqa
7 | 


--------------------------------------------------------------------------------
/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | #include <vector>
 3 | 
 4 | std::vector<float*> dynamicconv_cpu_forward(
 5 |     float* input,
 6 |     float* filters,
 7 |     int padding_l);
 8 | 
 9 | std::vector<float*> dynamicconv_cpu_backward(
10 |     float* gradOutput,
11 |     int padding_l,
12 |     float* input,
13 |     float* filters);
14 | 
15 | std::vector<float*> dynamicconv_forward(
16 |     float* input,
17 |     float* filters,
18 |     int padding_l) {
19 | 
20 |     return dynamicconv_cpu_forward(input, filters, padding_l);
21 | }
22 | 
23 | std::vector<float*> dynamicconv_backward(
24 |     float* gradOutput,
25 |     int padding_l,
26 |     float* input,
27 |     float* filters) {
28 | 
29 |     return dynamicconv_cpu_backward(gradOutput, padding_l, input, filters);
30 | }
31 | 
32 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
33 |     m.def("forward", &dynamicconv_forward, "dynamicconv forward (CPU)");
34 |     m.def("backward", &dynamicconv_backward, "dynamicconv backward (CPU)");
35 | }
36 | 


--------------------------------------------------------------------------------
/fairseq/modules/dynamicconv_layer/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from setuptools import setup
 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 9 | 
10 | setup(
11 |     name='dynamicconv_layer',
12 |     ext_modules=[
13 |         CUDAExtension(
14 |             name='dynamicconv_cuda',
15 |             sources=[
16 |                 'dynamicconv_cuda.cpp',
17 |                 'dynamicconv_cuda_kernel.cu',
18 |             ],
19 |         ),
20 |     ],
21 |     cmdclass={
22 |         'build_ext': BuildExtension
23 |     })
24 | 


--------------------------------------------------------------------------------
/fairseq/modules/fp32_group_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | Layer norm done in fp32 (for fp16 training)
 7 | """
 8 | 
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | 
12 | 
13 | class Fp32GroupNorm(nn.GroupNorm):
14 |     def __init__(self, *args, **kwargs):
15 |         super().__init__(*args, **kwargs)
16 | 
17 |     def forward(self, input):
18 |         output = F.group_norm(
19 |             input.float(),
20 |             self.num_groups,
21 |             self.weight.float() if self.weight is not None else None,
22 |             self.bias.float() if self.bias is not None else None,
23 |             self.eps,
24 |         )
25 |         return output.type_as(input)
26 | 


--------------------------------------------------------------------------------
/fairseq/modules/gelu.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | See "Gaussian Error Linear Units (GELUs)" by Dan Hendrycks and Kevin Gimpel with
 7 | the corresponding GitHub repo: https://github.com/hendrycks/GELUs
 8 | """
 9 | 
10 | import math
11 | 
12 | import torch
13 | import torch.nn as nn
14 | 
15 | 
16 | def gelu_accurate(x):
17 |     if not hasattr(gelu_accurate, "_a"):
18 |         gelu_accurate._a = math.sqrt(2 / math.pi)
19 |     return (
20 |         0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3))))
21 |     )
22 | 
23 | 
24 | def gelu(x: torch.Tensor) -> torch.Tensor:
25 |     return torch.nn.functional.gelu(x.float()).type_as(x)
26 | 


--------------------------------------------------------------------------------
/fairseq/modules/grad_multiply.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | class GradMultiply(torch.autograd.Function):
10 |     @staticmethod
11 |     def forward(ctx, x, scale):
12 |         ctx.scale = scale
13 |         res = x.new(x)
14 |         return res
15 | 
16 |     @staticmethod
17 |     def backward(ctx, grad):
18 |         return grad * ctx.scale, None
19 | 


--------------------------------------------------------------------------------
/fairseq/modules/layer_drop.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | LayerDrop as described in https://arxiv.org/abs/1909.11556.
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | 
13 | class LayerDropModuleList(nn.ModuleList):
14 |     """
15 |     A LayerDrop implementation based on :class:`torch.nn.ModuleList`.
16 | 
17 |     We refresh the choice of which layers to drop every time we iterate
18 |     over the LayerDropModuleList instance. During evaluation we always
19 |     iterate over all layers.
20 | 
21 |     Usage::
22 | 
23 |         layers = LayerDropList(p=0.5, modules=[layer1, layer2, layer3])
24 |         for layer in layers:  # this might iterate over layers 1 and 3
25 |             x = layer(x)
26 |         for layer in layers:  # this might iterate over all layers
27 |             x = layer(x)
28 |         for layer in layers:  # this might not iterate over any layers
29 |             x = layer(x)
30 | 
31 |     Args:
32 |         p (float): probability of dropping out each layer
33 |         modules (iterable, optional): an iterable of modules to add
34 |     """
35 | 
36 |     def __init__(self, p, modules=None):
37 |         super().__init__(modules)
38 |         self.p = p
39 | 
40 |     def __iter__(self):
41 |         dropout_probs = torch.empty(len(self)).uniform_()
42 |         for i, m in enumerate(super().__iter__()):
43 |             if not self.training or (dropout_probs[i] > self.p):
44 |                 yield m
45 | 


--------------------------------------------------------------------------------
/fairseq/modules/lightconv_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .lightconv_layer import LightconvLayer  # noqa
7 | 


--------------------------------------------------------------------------------
/fairseq/modules/lightconv_layer/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from setuptools import setup
 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 9 | 
10 | setup(
11 |     name='lightconv_layer',
12 |     ext_modules=[
13 |         CUDAExtension('lightconv_cuda', [
14 |             'lightconv_cuda.cpp',
15 |             'lightconv_cuda_kernel.cu',
16 |         ]),
17 |     ],
18 |     cmdclass={
19 |         'build_ext': BuildExtension
20 |     })
21 | 


--------------------------------------------------------------------------------
/fairseq/modules/positional_embedding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch.nn as nn
 7 | from .learned_positional_embedding import LearnedPositionalEmbedding
 8 | from .sinusoidal_positional_embedding import SinusoidalPositionalEmbedding
 9 | 
10 | 
11 | def PositionalEmbedding(
12 |         num_embeddings: int,
13 |         embedding_dim: int,
14 |         padding_idx: int,
15 |         learned: bool = False,
16 | ):
17 |     if learned:
18 |         # if padding_idx is specified then offset the embedding ids by
19 |         # this index and adjust num_embeddings appropriately
20 |         # TODO: The right place for this offset would be inside
21 |         # LearnedPositionalEmbedding. Move this there for a cleaner implementation.
22 |         if padding_idx is not None:
23 |             num_embeddings = num_embeddings + padding_idx + 1
24 |         m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx)
25 |         nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
26 |         if padding_idx is not None:
27 |             nn.init.constant_(m.weight[padding_idx], 0)
28 |     else:
29 |         m = SinusoidalPositionalEmbedding(
30 |             embedding_dim, padding_idx, init_size=num_embeddings + padding_idx + 1,
31 |         )
32 |     return m
33 | 


--------------------------------------------------------------------------------
/fairseq/modules/quantization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq/modules/quantization/__init__.py


--------------------------------------------------------------------------------
/fairseq/modules/quantization/pq/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .utils import SizeTracker, quantize_model_  # NOQA
7 | 


--------------------------------------------------------------------------------
/fairseq/modules/quantization/pq/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .qconv import PQConv2d  # NOQA
7 | from .qlinear import PQLinear  # NOQA
8 | from .qemb import PQEmbedding  # NOQA
9 | 


--------------------------------------------------------------------------------
/fairseq/modules/quantization/scalar/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .utils import quantize_model_  # NOQA
7 | 


--------------------------------------------------------------------------------
/fairseq/modules/quantization/scalar/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from .qconv import IntConv2d  # NOQA
 7 | from .qlinear import IntLinear  # NOQA
 8 | from .qemb import IntEmbedding  # NOQA
 9 | from .qact import ActivationQuantizer  # NOQA
10 | 


--------------------------------------------------------------------------------
/fairseq/modules/same_pad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | from torch import nn
 8 | 
 9 | 
10 | class SamePad(nn.Module):
11 |     def __init__(self, kernel_size):
12 |         super().__init__()
13 |         self.remove = kernel_size % 2 == 0
14 | 
15 |     def forward(self, x):
16 |         if self.remove:
17 |             x = x[:, :, :-1]
18 |         return x
19 | 


--------------------------------------------------------------------------------
/fairseq/modules/scalar_bias.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | #
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | class ScalarBias(torch.autograd.Function):
11 |     """
12 |     Adds a vector of scalars, used in self-attention mechanism to allow
13 |     the model to optionally attend to this vector instead of the past
14 |     """
15 | 
16 |     @staticmethod
17 |     def forward(ctx, input, dim, bias_init):
18 |         size = list(input.size())
19 |         size[dim] += 1
20 |         output = input.new(*size).fill_(bias_init)
21 |         output.narrow(dim, 1, size[dim] - 1).copy_(input)
22 |         ctx.dim = dim
23 |         return output
24 | 
25 |     @staticmethod
26 |     def backward(ctx, grad):
27 |         return grad.narrow(ctx.dim, 1, grad.size(ctx.dim) - 1), None, None
28 | 
29 | 
30 | def scalar_bias(input, dim, bias_init=0):
31 |     return ScalarBias.apply(input, dim, bias_init)
32 | 


--------------------------------------------------------------------------------
/fairseq/modules/transpose_last.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | """
 6 | transpose last 2 dimensions of the input
 7 | """
 8 | 
 9 | import torch.nn as nn
10 | 
11 | 
12 | class TransposeLast(nn.Module):
13 |     def __init__(self, deconstruct_idx=None):
14 |         super().__init__()
15 |         self.deconstruct_idx = deconstruct_idx
16 | 
17 |     def forward(self, x):
18 |         if self.deconstruct_idx is not None:
19 |             x = x[self.deconstruct_idx]
20 |         return x.transpose(-2, -1)
21 | 


--------------------------------------------------------------------------------
/fairseq/modules/unfold.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch.nn.functional as F
 7 | 
 8 | 
 9 | def unfold1d(x, kernel_size, padding_l, pad_value=0):
10 |     '''unfold T x B x C to T x B x C x K'''
11 |     if kernel_size > 1:
12 |         T, B, C = x.size()
13 |         x = F.pad(x, (0, 0, 0, 0, padding_l, kernel_size - 1 - padding_l), value=pad_value)
14 |         x = x.as_strided((T, B, C, kernel_size), (B*C, C, 1, B*C))
15 |     else:
16 |         x = x.unsqueeze(3)
17 |     return x
18 | 


--------------------------------------------------------------------------------
/fairseq/optim/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | from fairseq import registry
10 | from fairseq.optim.fairseq_optimizer import FairseqOptimizer, LegacyFairseqOptimizer # noqa
11 | from fairseq.optim.fp16_optimizer import FP16Optimizer, MemoryEfficientFP16Optimizer
12 | from fairseq.optim.bmuf import FairseqBMUF  # noqa
13 | from fairseq.optim.shard import shard_
14 | 
15 | 
16 | __all__ = [
17 |     'FairseqOptimizer',
18 |     'FP16Optimizer',
19 |     'MemoryEfficientFP16Optimizer',
20 |     'shard_',
21 | ]
22 | 
23 | 
24 | _build_optimizer, register_optimizer, OPTIMIZER_REGISTRY = registry.setup_registry(
25 |     '--optimizer',
26 |     base_class=FairseqOptimizer,
27 |     required=True,
28 | )
29 | 
30 | 
31 | def build_optimizer(args, params, *extra_args, **extra_kwargs):
32 |     if all(isinstance(p, dict) for p in params):
33 |         params = [t for p in params for t in p.values()]
34 |     params = list(filter(lambda p: p.requires_grad, params))
35 |     return _build_optimizer(args, params, *extra_args, **extra_kwargs)
36 | 
37 | 
38 | # automatically import any Python files in the optim/ directory
39 | for file in os.listdir(os.path.dirname(__file__)):
40 |     if file.endswith('.py') and not file.startswith('_'):
41 |         module = file[:file.find('.py')]
42 |         importlib.import_module('fairseq.optim.' + module)
43 | 


--------------------------------------------------------------------------------
/fairseq/optim/adagrad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import torch.optim
 7 | 
 8 | from . import register_optimizer, LegacyFairseqOptimizer
 9 | 
10 | 
11 | @register_optimizer('adagrad')
12 | class Adagrad(LegacyFairseqOptimizer):
13 |     def __init__(self, args, params):
14 |         super().__init__(args)
15 |         self._optimizer = torch.optim.Adagrad(params, **self.optimizer_config)
16 | 
17 |     @staticmethod
18 |     def add_args(parser):
19 |         """Add optimizer-specific arguments to the parser."""
20 |         # fmt: off
21 |         parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
22 |                             help='weight decay')
23 |         # fmt: on
24 | 
25 |     @property
26 |     def optimizer_config(self):
27 |         """
28 |         Return a kwarg dictionary that will be used to override optimizer
29 |         args stored in checkpoints. This allows us to load a checkpoint and
30 |         resume training using a different set of optimizer args, e.g., with a
31 |         different learning rate.
32 |         """
33 |         return {
34 |             'lr': self.args.lr[0],
35 |             'weight_decay': self.args.weight_decay,
36 |         }
37 | 
38 |     @property
39 |     def supports_flat_params(self):
40 |         return True
41 | 


--------------------------------------------------------------------------------
/fairseq/optim/lr_scheduler/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import importlib
 7 | import os
 8 | 
 9 | from fairseq import registry
10 | from fairseq.optim.lr_scheduler.fairseq_lr_scheduler import FairseqLRScheduler, LegacyFairseqLRScheduler # noqa
11 | 
12 | 
13 | build_lr_scheduler, register_lr_scheduler, LR_SCHEDULER_REGISTRY = registry.setup_registry(
14 |     '--lr-scheduler',
15 |     base_class=FairseqLRScheduler,
16 |     default='fixed',
17 | )
18 | 
19 | # automatically import any Python files in the optim/lr_scheduler/ directory
20 | for file in os.listdir(os.path.dirname(__file__)):
21 |     if file.endswith('.py') and not file.startswith('_'):
22 |         module = file[:file.find('.py')]
23 |         importlib.import_module('fairseq.optim.lr_scheduler.' + module)
24 | 


--------------------------------------------------------------------------------
/fairseq/optim/shard.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | try:
 8 |     from fairscale.optim import OSS
 9 |     _has_fairscale = True
10 | except ImportError:
11 |     _has_fairscale = False
12 | 
13 | 
14 | def shard_(args, optimizer):
15 |     if not _has_fairscale:
16 |         raise ImportError(
17 |             '\n\nPlease install the fairscale package:'
18 |             '\n\n  pip install fairscale'
19 |         )
20 | 
21 |     class FairseqOSS(OSS):
22 |         @property
23 |         def disable_mem_eff_fp16_loading_hack(self):
24 |             return True
25 | 
26 |         def __getattr__(self, name):
27 |             if name.startswith("supports") and hasattr(self.optim, name):
28 |                 return getattr(self.optim, name)
29 |             raise AttributeError("'FairseqOSS' object has no attribute {0!r}".format(name))
30 | 
31 |     torch_optimizer = optimizer.optimizer
32 |     optim_cls = type(torch_optimizer)
33 |     optimizer.optimizer = FairseqOSS(torch_optimizer.param_groups, optim_cls, **optimizer.optimizer_config)
34 | 


--------------------------------------------------------------------------------
/fairseq/pdb.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import multiprocessing
 7 | import os
 8 | import pdb
 9 | import sys
10 | 
11 | 
12 | __all__ = ['set_trace']
13 | 
14 | 
15 | _stdin = [None]
16 | _stdin_lock = multiprocessing.Lock()
17 | try:
18 |     _stdin_fd = sys.stdin.fileno()
19 | except Exception:
20 |     _stdin_fd = None
21 | 
22 | 
23 | class MultiprocessingPdb(pdb.Pdb):
24 |     """A Pdb wrapper that works in a multiprocessing environment.
25 | 
26 |     Usage: `from fairseq import pdb; pdb.set_trace()`
27 |     """
28 | 
29 |     def __init__(self):
30 |         pdb.Pdb.__init__(self, nosigint=True)
31 | 
32 |     def _cmdloop(self):
33 |         stdin_bak = sys.stdin
34 |         with _stdin_lock:
35 |             try:
36 |                 if _stdin_fd is not None:
37 |                     if not _stdin[0]:
38 |                         _stdin[0] = os.fdopen(_stdin_fd)
39 |                     sys.stdin = _stdin[0]
40 |                 self.cmdloop()
41 |             finally:
42 |                 sys.stdin = stdin_bak
43 | 
44 | 
45 | def set_trace():
46 |     pdb = MultiprocessingPdb()
47 |     pdb.set_trace(sys._getframe().f_back)
48 | 


--------------------------------------------------------------------------------
/fairseq/scoring/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | import importlib
 8 | import os
 9 | 
10 | from fairseq import registry
11 | 
12 | 
13 | _build_scoring, register_scoring, SCORING_REGISTRY = registry.setup_registry(
14 |     "--scoring", default="bleu"
15 | )
16 | 
17 | 
18 | def build_scorer(args, tgt_dict):
19 |     from fairseq import utils
20 | 
21 |     if args.sacrebleu:
22 |         utils.deprecation_warning(
23 |             "--sacrebleu is deprecated. Please use --scoring sacrebleu instead."
24 |         )
25 |         args.scoring = "sacrebleu"
26 |     if args.scoring == "bleu":
27 |         from fairseq.scoring import bleu
28 |         return bleu.Scorer(tgt_dict.pad(), tgt_dict.eos(), tgt_dict.unk())
29 |     else:
30 |         return _build_scoring(args)
31 | 
32 | 
33 | # automatically import any Python files in the current directory
34 | for file in os.listdir(os.path.dirname(__file__)):
35 |     if file.endswith(".py") and not file.startswith("_"):
36 |         module = file[: file.find(".py")]
37 |         importlib.import_module("fairseq.scoring." + module)
38 | 


--------------------------------------------------------------------------------
/fairseq/scoring/wer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq.scoring import register_scoring
 7 | 
 8 | 
 9 | @register_scoring("wer")
10 | class WerScorer(object):
11 |     def __init__(self, *unused):
12 |         self.reset()
13 | 
14 |     def reset(self):
15 |         self.distance = 0
16 |         self.ref_length = 0
17 | 
18 |     def add_string(self, ref, pred):
19 |         import editdistance
20 |         ref_items = ref.split()
21 |         pred_items = pred.split()
22 |         self.distance += editdistance.eval(ref_items, pred_items)
23 |         self.ref_length += len(ref_items)
24 | 
25 |     def result_string(self):
26 |         return f"WER: {self.score()}"
27 | 
28 |     def score(self):
29 |         return (
30 |             100.0 * self.distance / self.ref_length if self.ref_length > 0 else 0
31 |         )
32 | 


--------------------------------------------------------------------------------
/fairseq/tasks/translation_from_pretrained_xlm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary
 7 | from fairseq.tasks.translation import TranslationTask
 8 | 
 9 | from . import register_task
10 | 
11 | 
12 | @register_task("translation_from_pretrained_xlm")
13 | class TranslationFromPretrainedXLMTask(TranslationTask):
14 |     """
15 |     Same as TranslationTask except use the MaskedLMDictionary class so that
16 |     we can load data that was binarized with the MaskedLMDictionary class.
17 | 
18 |     This task should be used for the entire training pipeline when we want to
19 |     train an NMT model from a pretrained XLM checkpoint: binarizing NMT data,
20 |     training NMT with the pretrained XLM checkpoint, and subsequent evaluation
21 |     of that trained model.
22 |     """
23 | 
24 |     @classmethod
25 |     def load_dictionary(cls, filename):
26 |         """Load the masked LM dictionary from the filename
27 | 
28 |         Args:
29 |             filename (str): the filename
30 |         """
31 |         return MaskedLMDictionary.load(filename)
32 | 


--------------------------------------------------------------------------------
/fairseq/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import re
 7 | 
 8 | SPACE_NORMALIZER = re.compile(r"\s+")
 9 | 
10 | 
11 | def tokenize_line(line):
12 |     line = SPACE_NORMALIZER.sub(" ", line)
13 |     line = line.strip()
14 |     return line.split()
15 | 


--------------------------------------------------------------------------------
/fairseq_cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/fairseq_cli/__init__.py


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/compare_namespaces.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Helper script to compare two argparse.Namespace objects."""
 3 | 
 4 | from argparse import Namespace  # noqa
 5 | 
 6 | 
 7 | def main():
 8 | 
 9 |     ns1 = eval(input('Namespace 1: '))
10 |     ns2 = eval(input('Namespace 2: '))
11 | 
12 |     def keys(ns):
13 |         ks = set()
14 |         for k in dir(ns):
15 |             if not k.startswith('_'):
16 |                 ks.add(k)
17 |         return ks
18 | 
19 |     k1 = keys(ns1)
20 |     k2 = keys(ns2)
21 | 
22 |     def print_keys(ks, ns1, ns2=None):
23 |         for k in ks:
24 |             if ns2 is None:
25 |                 print('{}\t{}'.format(k, getattr(ns1, k, None)))
26 |             else:
27 |                 print('{}\t{}\t{}'.format(k, getattr(ns1, k, None), getattr(ns2, k, None)))
28 | 
29 |     print('Keys unique to namespace 1:')
30 |     print_keys(k1 - k2, ns1)
31 |     print()
32 | 
33 |     print('Keys unique to namespace 2:')
34 |     print_keys(k2 - k1, ns2)
35 |     print()
36 | 
37 |     print('Overlapping keys with different values:')
38 |     ks = [k for k in k1 & k2 if getattr(ns1, k, 'None') != getattr(ns2, k, 'None')]
39 |     print_keys(ks, ns1, ns2)
40 |     print()
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     main()
45 | 


--------------------------------------------------------------------------------
/scripts/compound_split_bleu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 1 ]; then
 4 |     echo "usage: $0 GENERATE_PY_OUTPUT"
 5 |     exit 1
 6 | fi
 7 | 
 8 | GEN=$1
 9 | 
10 | SYS=$GEN.sys
11 | REF=$GEN.ref
12 | 
13 | if [ $(tail -n 1 $GEN | grep BLEU | wc -l) -ne 1 ]; then
14 |     echo "not done generating"
15 |     exit
16 | fi
17 | 
18 | grep ^H $GEN | awk -F '\t' '{print $NF}' | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $SYS
19 | grep ^T $GEN | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $REF
20 | fairseq-score --sys $SYS --ref $REF
21 | 


--------------------------------------------------------------------------------
/scripts/constraints/validate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | #
 5 | # This source code is licensed under the MIT license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | import sys
 9 | 
10 | """Reads in a fairseq output file, and verifies that the constraints
11 | (C- lines) are present in the output (the first H- line). Assumes that
12 | constraints are listed prior to the first hypothesis.
13 | """
14 | 
15 | constraints = []
16 | found = 0
17 | total = 0
18 | for line in sys.stdin:
19 |     if line.startswith("C-"):
20 |         constraints.append(line.rstrip().split("\t")[1])
21 |     elif line.startswith("H-"):
22 |         text = line.split("\t")[2]
23 | 
24 |         for constraint in constraints:
25 |             total += 1
26 |             if constraint in text:
27 |                 found += 1
28 |             else:
29 |                 print(f"No {constraint} in {text}", file=sys.stderr)
30 | 
31 |         constraints = []
32 | 
33 | print(f"Found {found} / {total} = {100 * found / total:.1f}%")
34 | 


--------------------------------------------------------------------------------
/scripts/convert_dictionary.lua:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) Facebook, Inc. and its affiliates.
 2 | --
 3 | -- This source code is licensed under the MIT license found in the
 4 | -- LICENSE file in the root directory of this source tree.
 5 | --
 6 | -- Usage: convert_dictionary.lua <dict.th7>
 7 | require 'fairseq'
 8 | require 'torch'
 9 | require 'paths'
10 | 
11 | if #arg < 1 then
12 |    print('usage: convert_dictionary.lua <dict.th7>')
13 |    os.exit(1)
14 | end
15 | if not paths.filep(arg[1]) then
16 |    print('error: file does not exit: ' .. arg[1])
17 |    os.exit(1)
18 | end
19 | 
20 | dict = torch.load(arg[1])
21 | dst = paths.basename(arg[1]):gsub('.th7', '.txt')
22 | assert(dst:match('.txt$'))
23 | 
24 | f = io.open(dst, 'w')
25 | for idx, symbol in ipairs(dict.index_to_symbol) do
26 |   if idx > dict.cutoff then
27 |     break
28 |   end
29 |   f:write(symbol)
30 |   f:write(' ')
31 |   f:write(dict.index_to_freq[idx])
32 |   f:write('\n')
33 | end
34 | f:close()
35 | 


--------------------------------------------------------------------------------
/scripts/read_binarized.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import argparse
 8 | 
 9 | from fairseq.data import data_utils, Dictionary, indexed_dataset
10 | 
11 | 
12 | def get_parser():
13 |     parser = argparse.ArgumentParser(
14 |         description='writes text from binarized file to stdout')
15 |     # fmt: off
16 |     parser.add_argument('--dataset-impl', help='dataset implementation',
17 |                         choices=indexed_dataset.get_available_dataset_impl())
18 |     parser.add_argument('--dict', metavar='FP', help='dictionary containing known words', default=None)
19 |     parser.add_argument('--input', metavar='FP', required=True, help='binarized file to read')
20 |     # fmt: on
21 | 
22 |     return parser
23 | 
24 | 
25 | def main():
26 |     parser = get_parser()
27 |     args = parser.parse_args()
28 | 
29 |     dictionary = Dictionary.load(args.dict) if args.dict is not None else None
30 |     dataset = data_utils.load_indexed_dataset(
31 |         args.input,
32 |         dictionary,
33 |         dataset_impl=args.dataset_impl,
34 |         default='lazy',
35 |     )
36 | 
37 |     for tensor_line in dataset:
38 |         if dictionary is None:
39 |             line = ' '.join([str(int(x)) for x in tensor_line])
40 |         else:
41 |             line = dictionary.string(tensor_line)
42 | 
43 |         print(line)
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     main()
48 | 


--------------------------------------------------------------------------------
/scripts/sacrebleu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 4 ]; then
 4 |     echo "usage: $0 TESTSET SRCLANG TGTLANG GEN"
 5 |     exit 1
 6 | fi
 7 | 
 8 | TESTSET=$1
 9 | SRCLANG=$2
10 | TGTLANG=$3
11 | 
12 | GEN=$4
13 | 
14 | if ! command -v sacremoses &> /dev/null
15 | then
16 |     echo "sacremoses could not be found, please install with: pip install sacremoses"
17 |     exit
18 | fi
19 | 
20 | grep ^H $GEN \
21 | | sed 's/^H\-//' \
22 | | sort -n -k 1 \
23 | | cut -f 3 \
24 | | sacremoses detokenize \
25 | > $GEN.sorted.detok
26 | 
27 | sacrebleu --test-set $TESTSET --language-pair "${SRCLANG}-${TGTLANG}" < $GEN.sorted.detok
28 | 


--------------------------------------------------------------------------------
/scripts/spm_train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | from __future__ import absolute_import, division, print_function, unicode_literals
 9 | 
10 | import sys
11 | 
12 | import sentencepiece as spm
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
17 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/tests/__init__.py


--------------------------------------------------------------------------------
/tests/gpu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/tests/gpu/__init__.py


--------------------------------------------------------------------------------
/tests/gpu/transformer_quantization_config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # This file defines example configuration arguments for quantizing
 7 | # a transformer model with product quantization
 8 | 
 9 | n_centroids:
10 |     Linear:
11 |         key: in_features
12 |         value: {"*": 8}
13 |     Embedding:
14 |         key: embedding_dim
15 |         value: {"*": 8}
16 | 
17 | block_sizes:
18 |   Linear:
19 |       key: fuzzy_name
20 |       value: {fc: 8, attn: 4, emb: 4}
21 |   Embedding:
22 |       key: fuzzy_name
23 |       value: {emb: 8}
24 | 
25 | layers_to_quantize:
26 |     - decoder\\.layers\\.\d+\\.fc[12]
27 |     - decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]
28 |     - decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj)
29 | 


--------------------------------------------------------------------------------
/tests/speech_recognition/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shuo-git/VecConstNMT/c75004300ebf6dd16e139043348e206c3b6ab562/tests/speech_recognition/__init__.py


--------------------------------------------------------------------------------
/tests/speech_recognition/test_cross_entropy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from examples.speech_recognition.criterions.cross_entropy_acc import CrossEntropyWithAccCriterion
 8 | from .asr_test_base import CrossEntropyCriterionTestBase
 9 | 
10 | 
11 | class CrossEntropyWithAccCriterionTest(CrossEntropyCriterionTestBase):
12 |     def setUp(self):
13 |         self.criterion_cls = CrossEntropyWithAccCriterion
14 |         super().setUp()
15 | 
16 |     def test_cross_entropy_all_correct(self):
17 |         sample = self.get_test_sample(correct=True, soft_target=False, aggregate=False)
18 |         loss, sample_size, logging_output = self.criterion(
19 |             self.model, sample, "sum", log_probs=True
20 |         )
21 |         assert logging_output["correct"] == 20
22 |         assert logging_output["total"] == 20
23 |         assert logging_output["sample_size"] == 20
24 |         assert logging_output["ntokens"] == 20
25 | 
26 |     def test_cross_entropy_all_wrong(self):
27 |         sample = self.get_test_sample(correct=False, soft_target=False, aggregate=False)
28 |         loss, sample_size, logging_output = self.criterion(
29 |             self.model, sample, "sum", log_probs=True
30 |         )
31 |         assert logging_output["correct"] == 0
32 |         assert logging_output["total"] == 20
33 |         assert logging_output["sample_size"] == 20
34 |         assert logging_output["ntokens"] == 20
35 | 


--------------------------------------------------------------------------------
/tests/speech_recognition/test_data_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | import unittest
 7 | 
 8 | import torch
 9 | 
10 | from examples.speech_recognition.data import data_utils
11 | 
12 | 
13 | class DataUtilsTest(unittest.TestCase):
14 | 
15 |     def test_normalization(self):
16 |         sample_len1 = torch.tensor([[-0.7661, -1.3889, -2.0972, -0.9134, -0.7071, -0.9765, -0.8700, -0.8283,
17 |                                     0.7512,  1.3211,  2.1532,  2.1174,  1.2800,  1.2633,  1.6147,  1.6322,
18 |                                     2.0723,  3.1522,  3.2852,  2.2309,  2.5569,  2.2183,  2.2862,  1.5886,
19 |                                     0.8773,  0.8725,  1.2662,  0.9899,  1.1069,  1.3926,  1.2795,  1.1199,
20 |                                     1.1477,  1.2687,  1.3843,  1.1903,  0.8355,  1.1367,  1.2639,  1.4707]])
21 |         out = data_utils.apply_mv_norm(sample_len1)
22 |         assert not torch.isnan(out).any()
23 |         assert (out == sample_len1).all()
24 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3 -u
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """
 7 | Legacy entry point. Use fairseq_cli/train.py or fairseq-train instead.
 8 | """
 9 | 
10 | from fairseq_cli.train import cli_main
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     cli_main()
15 | 


--------------------------------------------------------------------------------