├── .dockerignore
├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   └── question.md
├── .gitignore
├── .readthedocs.yml
├── CONTRIBUTING.md
├── Dockerfile
├── Jenkinsfile
├── README.rst
├── docs
    ├── .nojekyll
    ├── Makefile
    ├── source
    │   ├── api-docs
    │   │   └── nemo.rst
    │   ├── asr
    │   │   ├── api.rst
    │   │   ├── asr_all.bib
    │   │   ├── ctc_asr.png
    │   │   ├── datasets.rst
    │   │   ├── intro.rst
    │   │   ├── jasper_layers.png
    │   │   ├── jasper_vertical.png
    │   │   ├── models.rst
    │   │   └── quartz_vertical.png
    │   ├── common
    │   │   └── intro.rst
    │   ├── conf.py
    │   ├── core.rst
    │   ├── cv
    │   │   └── intro.rst
    │   ├── index.rst
    │   ├── nlp
    │   │   ├── api.rst
    │   │   ├── intro.rst
    │   │   └── models.rst
    │   ├── nvidia_theme
    │   │   ├── __init__.py
    │   │   ├── breadcrumbs.html
    │   │   ├── footer.html
    │   │   ├── layout.html
    │   │   ├── layout_base.html
    │   │   ├── search.html
    │   │   ├── searchbox.html
    │   │   ├── static
    │   │   │   ├── css
    │   │   │   │   ├── badge_only.css
    │   │   │   │   ├── nvidia_theme.css
    │   │   │   │   └── theme.css
    │   │   │   ├── fonts
    │   │   │   │   ├── Lato
    │   │   │   │   │   ├── fonts.css
    │   │   │   │   │   ├── lato-bold.ttf
    │   │   │   │   │   ├── lato-bold.woff
    │   │   │   │   │   ├── lato-bold.woff2
    │   │   │   │   │   ├── lato-regular.ttf
    │   │   │   │   │   ├── lato-regular.woff
    │   │   │   │   │   └── lato-regular.woff2
    │   │   │   │   ├── fontawesome-webfont.eot
    │   │   │   │   ├── fontawesome-webfont.svg
    │   │   │   │   ├── fontawesome-webfont.ttf
    │   │   │   │   ├── fontawesome-webfont.woff
    │   │   │   │   └── fontawesome-webfont.woff2
    │   │   │   ├── images
    │   │   │   │   ├── favicon.ico
    │   │   │   │   └── nvidia_logo.png
    │   │   │   └── js
    │   │   │   │   ├── modernizr.min.js
    │   │   │   │   └── theme.js
    │   │   ├── theme.conf
    │   │   └── versions.html
    │   └── tts
    │   │   ├── intro.rst
    │   │   ├── models.rst
    │   │   ├── squeezewave_wn.png
    │   │   └── waveglow.png
    └── update_docs.sh
├── examples
    ├── asr
    │   ├── conf
    │   │   ├── config.yaml
    │   │   ├── jasper_10x5dr.yaml
    │   │   ├── matchboxnet_3x1x64_v1.yaml
    │   │   ├── matchboxnet_3x1x64_v2.yaml
    │   │   ├── matchboxnet_3x1x64_vad.yaml
    │   │   ├── quartznet_15x5.yaml
    │   │   └── quartznet_15x5_zh.yaml
    │   ├── experimental
    │   │   └── configs
    │   │   │   ├── config_bpe.yaml
    │   │   │   ├── contextnet_bpe
    │   │   │       ├── contextnet_192_2x_stride.yaml
    │   │   │       ├── contextnet_192_4x_stride.yaml
    │   │   │       └── contextnet_192_8x_stride.yaml
    │   │   │   └── quartznet_15x5_aug.yaml
    │   ├── speech_to_label.py
    │   ├── speech_to_text.py
    │   ├── speech_to_text_bpe.py
    │   └── speech_to_text_infer.py
    ├── cv
    │   └── mnist_lenet5_image_classification_pure_lightning.py
    ├── nlp
    │   ├── glue_benchmark
    │   │   ├── glue_benchmark.py
    │   │   └── glue_benchmark_config.yaml
    │   ├── intent_slot_classification
    │   │   ├── conf
    │   │   │   └── intent_slot_classification_config.yaml
    │   │   ├── data
    │   │   │   ├── assistant_utils.py
    │   │   │   └── import_datasets.py
    │   │   └── intent_slot_classification.py
    │   ├── language_modeling
    │   │   ├── bert_pretraining.py
    │   │   ├── conf
    │   │   │   ├── bert_pretraining_from_preprocessed_config.yaml
    │   │   │   ├── bert_pretraining_from_text_config.yaml
    │   │   │   └── transformer_lm_config.yaml
    │   │   ├── convert_weights_to_nemo1.0.py
    │   │   ├── get_wkt2.sh
    │   │   └── transformer_lm.py
    │   ├── question_answering
    │   │   ├── conf
    │   │   │   └── question_answering_squad_config.yaml
    │   │   ├── get_squad.py
    │   │   └── question_answering_squad.py
    │   ├── text_classification
    │   │   ├── conf
    │   │   │   └── text_classification_config.yaml
    │   │   ├── data
    │   │   │   └── import_datasets.py
    │   │   └── text_classification_with_bert.py
    │   └── token_classification
    │   │   ├── conf
    │   │       ├── punctuation_capitalization_config.yaml
    │   │       └── token_classification_config.yaml
    │   │   ├── data
    │   │       ├── get_tatoeba_data.py
    │   │       └── import_from_iob_format.py
    │   │   ├── punctuation_capitalization.py
    │   │   └── token_classification.py
    ├── speaker_recognition
    │   ├── conf
    │   │   ├── SpeakerNet_recognition_3x2x512.yaml
    │   │   └── SpeakerNet_verification_3x2x512.yaml
    │   ├── speaker_reco.py
    │   ├── spkr_get_emb.py
    │   └── voxceleb_eval.py
    └── tts
    │   ├── conf
    │       ├── glow_tts.yaml
    │       ├── squeezewave.yaml
    │       ├── tacotron2.yaml
    │       └── waveglow.yaml
    │   ├── glow_tts.py
    │   ├── squeezewave.py
    │   ├── tacotron2.py
    │   ├── test_tts_infer.py
    │   └── waveglow.py
├── external
    ├── get_collections.py
    └── get_modules.py
├── nemo
    ├── README.md
    ├── __init__.py
    ├── collections
    │   ├── __init__.py
    │   ├── asr
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── audio_to_label.py
    │   │   │   └── audio_to_text.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   ├── angularloss.py
    │   │   │   └── ctc.py
    │   │   ├── metrics
    │   │   │   ├── __init__.py
    │   │   │   ├── wer.py
    │   │   │   └── wer_bpe.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── asr_model.py
    │   │   │   ├── classification_models.py
    │   │   │   ├── ctc_bpe_models.py
    │   │   │   ├── ctc_models.py
    │   │   │   └── label_models.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── audio_preprocessing.py
    │   │   │   └── conv_asr.py
    │   │   └── parts
    │   │   │   ├── __init__.py
    │   │   │   ├── cleaners.py
    │   │   │   ├── collections.py
    │   │   │   ├── features.py
    │   │   │   ├── jasper.py
    │   │   │   ├── manifest.py
    │   │   │   ├── parsers.py
    │   │   │   ├── perturb.py
    │   │   │   ├── segment.py
    │   │   │   └── spectr_augment.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── callbacks
    │   │   │   ├── __init__.py
    │   │   │   └── callbacks.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   ├── aggregator.py
    │   │   │   ├── cross_entropy.py
    │   │   │   ├── mse_loss.py
    │   │   │   ├── smoothed_cross_entropy.py
    │   │   │   └── spanning_loss.py
    │   │   ├── metrics
    │   │   │   ├── __init__.py
    │   │   │   └── classification_accuracy.py
    │   │   ├── parts
    │   │   │   ├── __init__.py
    │   │   │   ├── multi_layer_perceptron.py
    │   │   │   ├── transformer_utils.py
    │   │   │   └── utils.py
    │   │   └── tokenizers
    │   │   │   ├── __init__.py
    │   │   │   ├── char_tokenizer.py
    │   │   │   ├── huggingface
    │   │   │       ├── __init__.py
    │   │   │       └── auto_tokenizer.py
    │   │   │   ├── sentencepiece_tokenizer.py
    │   │   │   ├── tokenizer_spec.py
    │   │   │   └── word_tokenizer.py
    │   ├── cv
    │   │   ├── __init__.py
    │   │   ├── datasets
    │   │   │   ├── __init__.py
    │   │   │   └── mnist_dataset.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   └── nll_loss.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   └── mnist_lenet5.py
    │   │   └── modules
    │   │   │   ├── __init__.py
    │   │   │   └── lenet5.py
    │   ├── nlp
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── data_utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── data_preprocessing.py
    │   │   │   ├── glue_benchmark
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── data_processors.py
    │   │   │   │   └── glue_benchmark_dataset.py
    │   │   │   ├── intent_slot_classification
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── intent_slot_classification_dataset.py
    │   │   │   │   └── intent_slot_classification_descriptor.py
    │   │   │   ├── language_modeling
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── l2r_lm_dataset.py
    │   │   │   │   └── lm_bert_dataset.py
    │   │   │   ├── question_answering_squad
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── qa_dataset.py
    │   │   │   │   └── qa_squad_processing.py
    │   │   │   ├── text_classification
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── text_classification_dataset.py
    │   │   │   └── token_classification
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── punctuation_capitalization_dataset.py
    │   │   │   │   ├── token_classification_dataset.py
    │   │   │   │   └── token_classification_descriptor.py
    │   │   ├── metrics
    │   │   │   ├── __init__.py
    │   │   │   ├── classification_report.py
    │   │   │   └── perplexity.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── glue_benchmark
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── glue_benchmark_model.py
    │   │   │   │   └── metrics_for_glue.py
    │   │   │   ├── intent_slot_classification
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── intent_slot_classification_model.py
    │   │   │   ├── language_modeling
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── bert_lm_model.py
    │   │   │   │   └── transformer_lm_model.py
    │   │   │   ├── question_answering
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── qa_model.py
    │   │   │   ├── text_classification
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── text_classification_model.py
    │   │   │   └── token_classification
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── punctuation_capitalization_model.py
    │   │   │   │   └── token_classification_model.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── common
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── bert_module.py
    │   │   │   │   ├── classifier.py
    │   │   │   │   ├── huggingface
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── albert.py
    │   │   │   │       ├── auto.py
    │   │   │   │       ├── bert.py
    │   │   │   │       ├── distilbert.py
    │   │   │   │       ├── huggingface_utils.py
    │   │   │   │       └── roberta.py
    │   │   │   │   ├── lm_utils.py
    │   │   │   │   ├── megatron
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── megatron_bert.py
    │   │   │   │       └── megatron_utils.py
    │   │   │   │   ├── sequence_classifier.py
    │   │   │   │   ├── sequence_regression.py
    │   │   │   │   ├── sequence_token_classifier.py
    │   │   │   │   ├── token_classifier.py
    │   │   │   │   ├── tokenizer_utils.py
    │   │   │   │   └── transformer
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── transformer_decoders.py
    │   │   │   │       ├── transformer_encoders.py
    │   │   │   │       ├── transformer_generators.py
    │   │   │   │       └── transformer_modules.py
    │   │   └── parts
    │   │   │   ├── __init__.py
    │   │   │   └── utils_funcs.py
    │   └── tts
    │   │   ├── __init__.py
    │   │   ├── data
    │   │       ├── __init__.py
    │   │       └── datalayers.py
    │   │   ├── helpers
    │   │       ├── __init__.py
    │   │       └── helpers.py
    │   │   ├── losses
    │   │       ├── __init__.py
    │   │       ├── glow_tts_loss.py
    │   │       ├── tacotron2loss.py
    │   │       └── waveglowloss.py
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       ├── base.py
    │   │       ├── glow_tts.py
    │   │       ├── squeezewave.py
    │   │       ├── tacotron2.py
    │   │       └── waveglow.py
    │   │   └── modules
    │   │       ├── __init__.py
    │   │       ├── denoiser.py
    │   │       ├── glow_tts.py
    │   │       ├── glow_tts_parser.py
    │   │       ├── glow_tts_submodules.py
    │   │       ├── squeezewave.py
    │   │       ├── squeezewave_submodules.py
    │   │       ├── submodules.py
    │   │       ├── tacotron2.py
    │   │       └── waveglow.py
    ├── constants.py
    ├── core
    │   ├── __init__.py
    │   ├── classes
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   ├── dataset.py
    │   │   ├── exportable.py
    │   │   ├── loss.py
    │   │   ├── modelPT.py
    │   │   └── module.py
    │   ├── config
    │   │   ├── __init__.py
    │   │   ├── base_config.py
    │   │   ├── optimizers.py
    │   │   ├── pytorch.py
    │   │   ├── pytorch_lightning.py
    │   │   ├── schedulers.py
    │   │   └── set_config.py
    │   ├── neural_types
    │   │   ├── __init__.py
    │   │   ├── axes.py
    │   │   ├── comparison.py
    │   │   ├── elements.py
    │   │   └── neural_type.py
    │   └── optim
    │   │   ├── __init__.py
    │   │   ├── lr_scheduler.py
    │   │   ├── novograd.py
    │   │   └── optimizers.py
    ├── package_info.py
    └── utils
    │   ├── __init__.py
    │   ├── arguments.py
    │   ├── cloud.py
    │   ├── decorators
    │       ├── __init__.py
    │       ├── deprecated.py
    │       ├── experimental.py
    │       └── port_docs.py
    │   ├── env_var_parsing.py
    │   ├── exceptions.py
    │   ├── exp_manager.py
    │   ├── export_utils.py
    │   ├── formatters
    │       ├── __init__.py
    │       ├── base.py
    │       ├── colors.py
    │       └── utils.py
    │   ├── get_rank.py
    │   ├── lightning_logger_patch.py
    │   ├── metaclasses.py
    │   ├── model_utils.py
    │   └── nemo_logging.py
├── reinstall.sh
├── requirements
    ├── requirements.txt
    ├── requirements_asr.txt
    ├── requirements_cv.txt
    ├── requirements_docs.txt
    ├── requirements_nlp.txt
    ├── requirements_simple_gan.txt
    ├── requirements_test.txt
    └── requirements_tts.txt
├── scripts
    ├── asr_checkpoint_port.py
    ├── convasr_to_onnx.py
    ├── convert_to_tarred_audio_dataset.py
    ├── freesound_download_resample
    │   ├── download_resample_freesound.sh
    │   ├── freesound_download.py
    │   ├── freesound_requirements.txt
    │   └── freesound_resample.py
    ├── get_librispeech_data.py
    ├── get_openslr_rir.py
    ├── process_an4_data.py
    ├── process_asr_text_tokenizer.py
    ├── process_speech_commands_data.py
    ├── process_vad_data.py
    └── scp_to_manifest.py
├── setup.cfg
├── setup.py
├── tests
    ├── collections
    │   ├── asr
    │   │   ├── test_asr_classification_model.py
    │   │   ├── test_asr_ctc_encoder_model_bpe.py
    │   │   ├── test_asr_ctcencdec_model.py
    │   │   ├── test_asr_datasets.py
    │   │   ├── test_asr_exportables.py
    │   │   ├── test_asr_metrics.py
    │   │   ├── test_asr_modules.py
    │   │   └── test_speaker_label_models.py
    │   ├── common
    │   │   ├── test_metrics.py
    │   │   └── test_spc_tokenizer.py
    │   ├── nlp
    │   │   ├── test_classification_report.py
    │   │   ├── test_huggingface.py
    │   │   ├── test_megatron.py
    │   │   └── test_nlp_exportables.py
    │   └── tts
    │   │   └── test_waveglow.py
    ├── conftest.py
    ├── core
    │   ├── test_exp_manager.py
    │   ├── test_fileio.py
    │   ├── test_neural_types.py
    │   ├── test_optimizers_schedulers.py
    │   ├── test_serialization.py
    │   └── test_typecheck.py
    ├── manualtest_model_downloads.py
    └── test_data_dir.py
├── tools
    └── speech_data_explorer
    │   ├── README.md
    │   ├── data_explorer.py
    │   ├── requirements.txt
    │   └── screenshot.png
└── tutorials
    ├── 00_NeMo_Primer.ipynb
    ├── 01_NeMo_Models.ipynb
    ├── NeMo_voice_swap_app.ipynb
    ├── asr
        ├── 01_ASR_with_NeMo.ipynb
        ├── 02_Online_ASR_Microphone_Demo.ipynb
        ├── 03_Speech_Commands.ipynb
        ├── 05_Online_Noise_Augmentation.ipynb
        ├── 06_Voice_Activiy_Detection.ipynb
        └── 07_Online_Offline_Microphone_VAD_Demo.ipynb
    ├── nlp
        ├── 01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb
        ├── 02_NLP_Tokenizers.ipynb
        ├── GLUE_Benchmark.ipynb
        ├── Intent_and_Slot_Classification.ipynb
        ├── Punctuation_and_Capitalization.ipynb
        ├── Question_Answering_Squad.ipynb
        ├── Relation_Extraction-BioMegatron.ipynb
        ├── Text_Classification_Sentiment_Analysis.ipynb
        ├── Token_Classification-BioMegatron.ipynb
        └── Token_Classification_Named_Entity_Recognition.ipynb
    ├── speaker_recognition
        └── Speaker_Recognition_Verification.ipynb
    └── tts
        └── 1_TTS_inference.ipynb


/.dockerignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *.pyc
 3 | *.pyo
 4 | *.pyd
 5 | .Python
 6 | env
 7 | pip-log.txt
 8 | pip-delete-this-directory.txt
 9 | .tox
10 | .coverage
11 | .coverage.*
12 | .cache
13 | nosetests.xml
14 | coverage.xml
15 | *,cover
16 | *.log
17 | .git
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | 
12 | A clear and concise description of what the bug is.
13 | 
14 | **Steps/Code to reproduce bug**
15 | 
16 | Please list *minimal* steps or code snippet for us to be able to reproduce the bug.
17 | 
18 | A  helpful guide on on how to craft a minimal bug report  http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports. 
19 | 
20 | 
21 | **Expected behavior**
22 | 
23 | A clear and concise description of what you expected to happen.
24 | 
25 | **Environment overview (please complete the following information)**
26 | 
27 |  - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)]
28 |  - Method of NeMo install: [pip install or from source]. Please specify exact commands you used to install.
29 |  - If method of install is [Docker], provide `docker pull` & `docker run` commands used
30 | 
31 | **Environment details**
32 | 
33 | If NVIDIA docker image is used you don't need to specify these.
34 | Otherwise, please provide:
35 | - OS version
36 | - PyTorch version
37 | - Python version
38 | 
39 | **Additional context**
40 | 
41 | Add any other context about the problem here.
42 | Example: GPU model
43 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: feature request
 6 | assignees: okuchaiev
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | 
12 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
13 | 
14 | **Describe the solution you'd like**
15 | 
16 | A clear and concise description of what you want to happen.
17 | Provide a code snippet on how new APIs/changes would be used by others.
18 | 
19 | **Describe alternatives you've considered**
20 | 
21 | A clear and concise description of any alternative solutions or features you've considered.
22 | 
23 | **Additional context**
24 | 
25 | Add any other context or screenshots about the feature request here.
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Question
 3 | about: Post a question about using NeMo
 4 | title: "[Question]"
 5 | labels: question
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe your question**
11 | 
12 | A clear and concise description of your question.
13 | Describe what you want to achieve. And/or what NeMo APIs are unclear/confusing.
14 | 
15 | 
16 | **Environment overview (please complete the following information)**
17 | 
18 |  - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)]
19 |  - Method of NeMo install: [pip install or from source]. Please specify exact commands you used to install.
20 |  - If method of install is [Docker], provide `docker pull` & `docker run` commands used
21 | 
22 | **Environment details**
23 | 
24 | If NVIDIA docker image is used you don't need to specify these.
25 | Otherwise, please provide:
26 | - OS version
27 | - PyTorch version
28 | - Python version
29 | 
30 | **Additional context**
31 | 
32 | Add any other context about the problem here.
33 | Example: GPU model
34 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # Copyright (c) 2020 NVIDIA. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =============================================================================
16 | 
17 | # Read the Docs configuration file
18 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
19 | 
20 | # Required field.
21 | version: 2
22 | 
23 | # Build documentation in the docs/ directory with Sphinx.
24 | sphinx:
25 |   configuration: docs/source/conf.py
26 | 
27 | # Set the version of Python and requirements required to build your docs
28 | python:
29 |   version: 3.7
30 |   install:
31 |     - requirements: requirements/requirements_docs.txt
32 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:experimental
 2 | 
 3 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:20.08-py3
18 | 
19 | 
20 | # build an image that includes only the nemo dependencies, ensures that dependencies
21 | # are included first for optimal caching, and useful for building a development
22 | # image (by specifying build target as `nemo-deps`)
23 | FROM ${BASE_IMAGE} as nemo-deps
24 | 
25 | # Ensure apt-get won't prompt for selecting options
26 | ENV DEBIAN_FRONTEND=noninteractive
27 | RUN apt-get update && \
28 |     apt-get install -y \
29 |     libsndfile1 sox \
30 |     python-setuptools \
31 |     python-dev ffmpeg && \
32 |     rm -rf /var/lib/apt/lists/*
33 | 
34 | # build torchaudio (change latest release version to match pytorch)
35 | WORKDIR /tmp/torchaudio_build
36 | RUN git clone --depth 1 --branch release/0.6 https://github.com/pytorch/audio.git && \
37 |     cd audio && \
38 |     BUILD_SOX=1 python setup.py install && \
39 |     cd .. && rm -r audio
40 | 
41 | # install nemo dependencies
42 | WORKDIR /tmp/nemo
43 | COPY requirements .
44 | RUN for f in $(ls requirements/*.txt); do pip install --disable-pip-version-check --no-cache-dir -r $f; done
45 | 
46 | # copy nemo source into a scratch image
47 | FROM scratch as nemo-src
48 | COPY . .
49 | 
50 | # start building the final container
51 | FROM nemo-deps as nemo
52 | ARG NEMO_VERSION=1.0.0b1
53 | 
54 | # Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container
55 | # version information as runtime environment variable for introspection purposes
56 | RUN /usr/bin/test -n "$NEMO_VERSION" && \
57 |     /bin/echo "export NEMO_VERSION=${NEMO_VERSION}" >> /root/.bashrc && \
58 |     /bin/echo "export BASE_IMAGE=${BASE_IMAGE}" >> /root/.bashrc
59 | RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]"
60 | 
61 | # copy scripts/examples/tests into container for end user
62 | WORKDIR /workspace/nemo
63 | COPY scripts /workspace/nemo/scripts
64 | COPY examples /workspace/nemo/examples
65 | COPY tests /workspace/nemo/tests
66 | # COPY README.rst LICENSE /workspace/nemo/
67 | 
68 | RUN printf "#!/bin/bash\njupyter lab --no-browser --allow-root --ip=0.0.0.0" >> start-jupyter.sh && \
69 |     chmod +x start-jupyter.sh
70 | 
71 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/source/api-docs/nemo.rst:
--------------------------------------------------------------------------------
 1 | NeMo Core API
 2 | =============
 3 | 
 4 | Classes and Interfaces
 5 | ----------------------
 6 | 
 7 | .. autoclass:: nemo.core.ModelPT
 8 |     :show-inheritance:
 9 |     :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact
10 | 
11 | 
12 | Neural Types
13 | ------------
14 | 
15 | .. automodule:: nemo.core.neural_types.neural_type
16 |     :members:
17 |     :undoc-members:
18 |     :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/docs/source/asr/api.rst:
--------------------------------------------------------------------------------
 1 | NeMo ASR collection API
 2 | =======================
 3 | 
 4 | 
 5 | Model Classes
 6 | -------------
 7 | 
 8 | .. autoclass:: nemo.collections.asr.models.EncDecCTCModel
 9 |     :show-inheritance:
10 |     :members: transcribe, change_vocabulary, setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact
11 | 
12 | 
13 | .. autoclass:: nemo.collections.asr.models.EncDecClassificationModel
14 |     :show-inheritance:
15 |     :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact
16 | 
17 | 
18 | .. autoclass:: nemo.collections.asr.models.EncDecSpeakerLabelModel
19 |     :show-inheritance:
20 |     :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact
21 | 
22 | 
23 | 
24 | Modules
25 | -------
26 | 
27 | .. autoclass:: nemo.collections.asr.modules.ConvASREncoder
28 |     :show-inheritance:
29 |     :members:
30 | 
31 | .. autoclass:: nemo.collections.asr.modules.ConvASRDecoder
32 |     :show-inheritance:
33 |     :members:


--------------------------------------------------------------------------------
/docs/source/asr/ctc_asr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/asr/ctc_asr.png


--------------------------------------------------------------------------------
/docs/source/asr/intro.rst:
--------------------------------------------------------------------------------
 1 | Automatic Speech Recognition (ASR)
 2 | ==================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 8
 6 | 
 7 |    datasets
 8 |    models
 9 |    api
10 | 
11 | 
12 | Speech recognition tutorials can be found under ``<NeMo_Git_root>/tutorials/asr/``
13 | 


--------------------------------------------------------------------------------
/docs/source/asr/jasper_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/asr/jasper_layers.png


--------------------------------------------------------------------------------
/docs/source/asr/jasper_vertical.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/asr/jasper_vertical.png


--------------------------------------------------------------------------------
/docs/source/asr/models.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ======
 3 | 
 4 | Currently, NeMo's ASR collection supports the following models:
 5 | 
 6 | .. _Jasper_model:
 7 | 
 8 | Jasper
 9 | ------
10 | 
11 | Jasper ("Just Another SPeech Recognizer") :cite:`asr-models-li2019jasper`  is a deep time delay neural network (TDNN) comprising of blocks of 1D-convolutional layers.
12 | Jasper family of models are denoted as Jasper_[BxR] where B is the number of blocks, and R - the number of convolutional sub-blocks within a block. Each sub-block contains a 1-D convolution, batch normalization, ReLU, and dropout:
13 | 
14 |     .. image:: jasper_vertical.png
15 |         :align: center
16 |         :alt: japer model
17 | 
18 | 
19 | QuartzNet
20 | ---------
21 | 
22 | QuartzNet :cite:`asr-models-kriman2019quartznet` is a version of Jasper :cite:`asr-models-li2019jasper` model with separable convolutions and larger filters. It can achieve performance
23 | similar to Jasper but with an order of magnitude less parameters.
24 | Similarly to Jasper, QuartzNet family of models are denoted as QuartzNet_[BxR] where B is the number of blocks, and R - the number of convolutional sub-blocks within a block. Each sub-block contains a 1-D *separable* convolution, batch normalization, ReLU, and dropout:
25 | 
26 |     .. image:: quartz_vertical.png
27 |         :align: center
28 |         :alt: quartznet model
29 | 
30 | 
31 | Jasper and QuartzNet models can be instantiated using :class:`EncDecCTCModel<nemo.collections.asr.models.EncDecCTCModel>` class.
32 | 
33 | 
34 | 
35 | References
36 | ----------
37 | 
38 | .. bibliography:: asr_all.bib
39 |     :style: plain
40 |     :labelprefix: ASR-MODELS
41 |     :keyprefix: asr-models-
42 | 


--------------------------------------------------------------------------------
/docs/source/asr/quartz_vertical.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/asr/quartz_vertical.png


--------------------------------------------------------------------------------
/docs/source/common/intro.rst:
--------------------------------------------------------------------------------
 1 | Common Collection
 2 | =================
 3 | 
 4 | The common collection contains things that could be used across all collections.
 5 | 
 6 | Tokenizers
 7 | ----------
 8 | .. automodule:: nemo.collections.common.tokenizers.AutoTokenizer
 9 |     :special-members: __init__
10 | .. automodule:: nemo.collections.common.tokenizers.SentencePieceTokenizer
11 |     :special-members: __init__
12 | .. automodule:: nemo.collections.common.tokenizers.TokenizerSpec
13 |     :special-members: __init__
14 | 
15 | 
16 | Losses
17 | ------
18 | .. automodule:: nemo.collections.common.losses.AggregatorLoss
19 |     :special-members: __init__
20 | 
21 | .. automodule:: nemo.collections.common.losses.CrossEntropyLoss
22 |     :special-members: __init__
23 | 
24 | .. automodule:: nemo.collections.common.losses.MSELoss
25 |     :special-members: __init__
26 | 
27 | .. automodule:: nemo.collections.common.losses.SmoothedCrossEntropyLoss
28 |     :special-members: __init__
29 | .. automodule:: nemo.collections.common.losses.SpanningLoss
30 |     :special-members: __init__
31 | 
32 | 


--------------------------------------------------------------------------------
/docs/source/core.rst:
--------------------------------------------------------------------------------
 1 | Core Concepts
 2 | =============
 3 | 
 4 | Neural Module
 5 | ~~~~~~~~~~~~~
 6 | Neural Modules are building blocks for Models.
 7 | They accept (typed) inputs and return (typed) outputs. *All Neural Modules inherit from ``torch.nn.Module`` and, therefore, compatible with PyTorch ecosystem.* There are 3 types on Neural Modules:
 8 | 
 9 |     * Regular modules
10 |     * Dataset/IterableDataset
11 |     * Losses
12 | 
13 | Model
14 | ~~~~~
15 | NeMo Model is an entity which contains 100% of information necessary to invoke training/fine-tuning.
16 | It is based on Pytorch Lightning's LightningModule and as such contains information on:
17 | 
18 |     * Neural Network architecture, including necessary pre- and post- processing
19 |     * How data is handled for training/validation/testing
20 |     * Optimization, learning rate schedules, scaling, etc.
21 | 
22 | Neural Types
23 | ~~~~~~~~~~~~
24 | 
25 | Neural Types perform semantic checks for modules and models inputs/outputs. They contain information about:
26 | 
27 |     * Semantics of what is stored in the tensors. For example, logits, logprobs, audiosignal, embeddings, etc.
28 |     * Axes layout, semantic and (optionally) dimensionality. For example: [Batch, Time, Channel]


--------------------------------------------------------------------------------
/docs/source/cv/intro.rst:
--------------------------------------------------------------------------------
 1 | Computer Vision (CV)
 2 | ===============================
 3 | 
 4 | The collection contains several datasets, modules and losses useful in computer/machine vision tasks.
 5 | 
 6 | Models
 7 | ------
 8 | .. automodule:: nemo.collections.cv.models.mnist_lenet5
 9 | 
10 | Datasets
11 | ----------
12 | .. automodule:: nemo.collections.cv.datasets.mnist_dataset
13 |     :special-members: __init__, __len__, __getitem__
14 | 
15 | Neural Modules
16 | --------------
17 | .. automodule:: nemo.collections.cv.modules.lenet5
18 |     :special-members: __init__
19 | 
20 | Losses
21 | ------
22 | .. automodule:: nemo.collections.cv.losses.nll_loss
23 |     :special-members: __init__
24 | 
25 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | NVIDIA NeMo Developer Guide
 2 | ===========================
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 8
 7 | 
 8 |    Introduction <self>
 9 |    core
10 |    asr/intro
11 |    cv/intro
12 |    nlp/intro
13 |    tts/intro
14 |    common/intro
15 |    api-docs/nemo
16 | 
17 | 
18 | NeMo is a library for easy training, building and manipulating of AI models.
19 | NeMo's current focus is providing great experience for Conversational AI.
20 | 
21 | NeMo models can be trained on multi-GPU and multi-node, with or without Mixed Precision
22 | Many models in NeMo come with high-quality pre-trained checkpoints.
23 | 
24 | Requirements
25 | ------------
26 | 
27 | NeMo's main requirements are:
28 | 
29 | 1) Python 3.6 or 3.7
30 | 2) Pytorch 1.6 or above
31 | 
32 | Installation
33 | ~~~~~~~~~~~~
34 | ``pip install nemo_toolkit[all]==version``
35 | 
36 | We recommend using NVIDIA's PyTorch container
37 | 
38 | .. code-block:: bash
39 | 
40 |     docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g \
41 |     -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \
42 |     stack=67108864 nvcr.io/nvidia/pytorch:20.06-py3
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/docs/source/nlp/api.rst:
--------------------------------------------------------------------------------
 1 | NeMo NLP collection API
 2 | =======================
 3 | 
 4 | 
 5 | Model Classes
 6 | -------------
 7 | 
 8 | .. autoclass:: nemo.collections.nlp.models.GLUEModel
 9 |     :show-inheritance:
10 |     :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact
11 | 
12 | .. autoclass:: nemo.collections.nlp.models.PunctuationCapitalizationModel
13 |     :show-inheritance:
14 |     :members: add_punctuation_capitalization, setup_training_data, setup_optimization, setup_validation_data, setup_test_data, multi_validation_epoch_end, register_artifact
15 | 
16 | .. autoclass:: nemo.collections.nlp.models.TokenClassificationModel
17 |     :show-inheritance:
18 |     :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact
19 | 
20 | 
21 | Modules
22 | -------
23 | 
24 | .. autoclass:: nemo.collections.nlp.modules.BertModule
25 |     :show-inheritance:
26 |     :members:
27 | 
28 | .. autoclass:: nemo.collections.nlp.modules.MegatronBertEncoder
29 |     :show-inheritance:
30 |     :members:
31 | 
32 | .. autoclass:: nemo.collections.nlp.modules.AlbertEncoder
33 |     :show-inheritance:
34 |     :members:
35 | 
36 | .. autoclass:: nemo.collections.nlp.modules.BertEncoder
37 |     :show-inheritance:
38 |     :members:
39 | 
40 | .. autoclass:: nemo.collections.nlp.modules.DistilBertEncoder
41 |     :show-inheritance:
42 |     :members:
43 | 
44 | .. autoclass:: nemo.collections.nlp.modules.RobertaEncoder
45 |     :show-inheritance:
46 |     :members:
47 | 
48 | .. autoclass:: nemo.collections.nlp.modules.SequenceClassifier
49 |     :show-inheritance:
50 |     :members:
51 | 
52 | .. autoclass:: nemo.collections.nlp.modules.SequenceRegression
53 |     :show-inheritance:
54 |     :members:
55 | 
56 | .. autoclass:: nemo.collections.nlp.modules.SequenceTokenClassifier
57 |     :show-inheritance:
58 |     :members:
59 | 
60 | .. autofunction::  nemo.collections.nlp.modules.get_pretrained_lm_model
61 | 
62 | .. autofunction::  nemo.collections.nlp.modules.get_pretrained_lm_models_list


--------------------------------------------------------------------------------
/docs/source/nlp/intro.rst:
--------------------------------------------------------------------------------
 1 | Natural Language Processing (NLP)
 2 | =================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 8
 6 | 
 7 |    models
 8 |    api
 9 | 
10 | 
11 | NLP tutorials can be found under ``<NeMo_Git_root>/tutorials/nlp/``
12 | 


--------------------------------------------------------------------------------
/docs/source/nlp/models.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ======
 3 | 
 4 | NeMo's NLP collection supports the following models:
 5 | 
 6 | * BERT pretraining
 7 | * GLUE Benchmark
 8 | * Joint Intent and Slot Classification
 9 | * Text Classification
10 | * Name Entity Recognition (NER)
11 | * Punctuation and Capitalization
12 | * Question Answering
13 | 
14 | Scripts for running these models, could be found under ``NeMo/example/nlp/``.
15 | NLP tutorials are located under ``NeMo/tutorials/nlp/``.


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Copyright (c) 2011-2019 Ryan Roemer
16 | #
17 | # Permission is hereby granted, free of charge, to any person obtaining a copy
18 | # of this software and associated documentation files (the "Software"), to deal
19 | # in the Software without restriction, including without limitation the rights
20 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
21 | # copies of the Software, and to permit persons to whom the Software is
22 | # furnished to do so, subject to the following conditions:
23 | #
24 | # The above copyright notice and this permission notice shall be included in
25 | # all copies or substantial portions of the Software.
26 | #
27 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
31 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
32 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
33 | # THE SOFTWARE.
34 | 
35 | """
36 | NVIDIA developer guide theme.
37 | 
38 | Based onf the  https://github.com/ryan-roemer/sphinx-bootstrap-theme.
39 | """
40 | from os import path
41 | 
42 | __version__ = '0.1.0a1'
43 | __version_full__ = __version__
44 | 
45 | 
46 | def get_html_theme_path():
47 |     """Return list of HTML theme paths."""
48 |     cur_dir = path.abspath(path.dirname(path.dirname(__file__)))
49 |     return cur_dir
50 | 
51 | 
52 | # See http://www.sphinx-doc.org/en/stable/theming.html#distribute-your-theme-as-a-python-package
53 | def setup(app):
54 |     app.add_html_theme('sphinx_nvidia_theme', path.abspath(path.dirname(__file__)))
55 | 


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/footer.html:
--------------------------------------------------------------------------------
 1 | <footer>
 2 |   {% if (theme_prev_next_buttons_location == 'bottom' or theme_prev_next_buttons_location == 'both') and (next or prev) %}
 3 |     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
 4 |       {% if next %}
 5 |         <a href="{{ next.link|e }}" class="btn btn-neutral float-right" title="{{ next.title|striptags|e }}" accesskey="n" rel="next">{{ _('Next') }} <span class="fa fa-arrow-circle-right"></span></a>
 6 |       {% endif %}
 7 |       {% if prev %}
 8 |         <a href="{{ prev.link|e }}" class="btn btn-neutral float-left" title="{{ prev.title|striptags|e }}" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> {{ _('Previous') }}</a>
 9 |       {% endif %}
10 |     </div>
11 |   {% endif %}
12 | 
13 |   <hr/>
14 | 
15 |   <div role="contentinfo">
16 |     <p>
17 |     {%- if show_copyright %}
18 |       {%- if hasdoc('copyright') %}
19 |         {% trans path=pathto('copyright'), copyright=copyright|e %}&copy; <a href="{{ path }}">Copyright</a> {{ copyright }}{% endtrans %}
20 |       {%- else %}
21 |         {% trans copyright=copyright|e %}&copy; Copyright {{ copyright }}{% endtrans %}
22 |       {%- endif %}
23 |     {%- endif %}
24 | 
25 |     {%- if build_id and build_url %}
26 |       {% trans build_url=build_url, build_id=build_id %}
27 |         <span class="build">
28 |           Build
29 |           <a href="{{ build_url }}">{{ build_id }}</a>.
30 |         </span>
31 |       {% endtrans %}
32 |     {%- elif commit %}
33 |       {% trans commit=commit %}
34 |         <span class="commit">
35 |           Revision <code>{{ commit }}</code>.
36 |         </span>
37 |       {% endtrans %}
38 |     {%- elif last_updated %}
39 |       <span class="lastupdated">
40 |         {% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %}
41 |       </span>
42 |     {%- endif %}
43 | 
44 |     </p>
45 |   </div>
46 | 
47 |   {%- if show_sphinx %}
48 |   {% trans %}Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>{% endtrans %}.
49 |   {%- endif %}
50 | 
51 |   {%- block extrafooter %} {% endblock %}
52 | 
53 | </footer>
54 | 
55 | 


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout_base.html" %}
 2 | {% block extrahead %}
 3 |     <link href="{{ pathto("_static/css/nvidia_theme.css", True) }}" rel="stylesheet" type="text/css">
 4 |     {# PROJECT NAME #}
 5 |     {% if theme_project_name %}
 6 |         <style>
 7 |             .wy-nav-content::before {
 8 |                 content: "{{ theme_project_name }} Documentation";
 9 |             }
10 |         </style>
11 |     {% endif %}
12 | {% endblock %}
13 | 


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/search.html:
--------------------------------------------------------------------------------
 1 | {#
 2 |     basic/search.html
 3 |     ~~~~~~~~~~~~~~~~~
 4 | 
 5 |     Template for the search page.
 6 | 
 7 |     :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
 8 |     :license: BSD, see LICENSE for details.
 9 | #}
10 | {%- extends "layout.html" %}
11 | {% set title = _('Search') %}
12 | {%- block scripts %}
13 |     {{ super() }}
14 |     <script type="text/javascript" src="{{ pathto('_static/searchtools.js', 1) }}"></script>
15 | {%- endblock %}
16 | {% block footer %}
17 |   <script type="text/javascript">
18 |     jQuery(function() { Search.loadIndex("{{ pathto('searchindex.js', 1) }}"); });
19 |   </script>
20 |   {# this is used when loading the search index using $.ajax fails,
21 |      such as on Chrome for documents on localhost #}
22 |   <script type="text/javascript" id="searchindexloader"></script>
23 |   {{ super() }}
24 | {% endblock %}
25 | {% block body %}
26 |   <noscript>
27 |   <div id="fallback" class="admonition warning">
28 |     <p class="last">
29 |       {% trans trimmed %}Please activate JavaScript to enable the search
30 |       functionality.{% endtrans %}
31 |     </p>
32 |   </div>
33 |   </noscript>
34 | 
35 |   {% if search_performed %}
36 |     {# Translators: Search is a noun, not a verb #}
37 |     <h2>{{ _('Search Results') }}</h2>
38 |     {% if not search_results %}
39 |       <p>{{ _('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.') }}</p>
40 |     {% endif %}
41 |   {% endif %}
42 |   <div id="search-results">
43 |   {% if search_results %}
44 |     <ul>
45 |     {% for href, caption, context in search_results %}
46 |       <li>
47 |         <a href="{{ pathto(item.href) }}">{{ caption }}</a>
48 |         <p class="context">{{ context|e }}</p>
49 |       </li>
50 |     {% endfor %}
51 |     </ul>
52 |   {% endif %}
53 |   </div>
54 | {% endblock %}
55 | 


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/searchbox.html:
--------------------------------------------------------------------------------
 1 | {%- if builder != 'singlehtml' %}
 2 | <div role="search">
 3 |   <form id="rtd-search-form" class="wy-form" action="{{ pathto('search') }}" method="get">
 4 |     <input type="text" name="q" placeholder="{{ _('Search docs') }}" />
 5 |     <input type="hidden" name="check_keywords" value="yes" />
 6 |     <input type="hidden" name="area" value="default" />
 7 |   </form>
 8 | </div>
 9 | {%- endif %}
10 | 


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/Lato/fonts.css:
--------------------------------------------------------------------------------
 1 | /* Fonts */
 2 | 
 3 | @font-face {
 4 |     font-family: 'lato';
 5 |     src: url('lato-regular.woff2?#iefix') format('woff2'),
 6 |          url('lato-regular.woff') format('woff'),
 7 |          url('lato-regular.ttf') format('truetype');
 8 |     font-weight: normal;
 9 |     font-style: normal;
10 | }


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.ttf


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.woff


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.ttf


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.woff


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.woff2


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/images/favicon.ico


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/static/images/nvidia_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/images/nvidia_logo.png


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/theme.conf:
--------------------------------------------------------------------------------
 1 | [theme]
 2 | inherit = basic
 3 | stylesheet = css/nvidia_theme.css
 4 | pygments_style = default
 5 | 
 6 | [options]
 7 | analytics_id =
 8 | canonical_url =
 9 | collapse_navigation = True
10 | display_version = True
11 | includehidden = True
12 | logo_only =
13 | logo_path =
14 | navigation_depth = 4
15 | prev_next_buttons_location = bottom
16 | project_name =
17 | project_version =
18 | sticky_navigation = True
19 | style_external_links = False
20 | titles_only =
21 | 


--------------------------------------------------------------------------------
/docs/source/nvidia_theme/versions.html:
--------------------------------------------------------------------------------
 1 | {% if READTHEDOCS %}
 2 | {# Add rst-badge after rst-versions for small badge style. #}
 3 |   <div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
 4 |     <span class="rst-current-version" data-toggle="rst-current-version">
 5 |       <span class="fa fa-book"> Read the Docs</span>
 6 |       v: {{ current_version }}
 7 |       <span class="fa fa-caret-down"></span>
 8 |     </span>
 9 |     <div class="rst-other-versions">
10 |       <dl>
11 |         <dt>{{ _('Versions') }}</dt>
12 |         {% for slug, url in versions %}
13 |           <dd><a href="{{ url }}">{{ slug }}</a></dd>
14 |         {% endfor %}
15 |       </dl>
16 |       <dl>
17 |         <dt>{{ _('Downloads') }}</dt>
18 |         {% for type, url in downloads %}
19 |           <dd><a href="{{ url }}">{{ type }}</a></dd>
20 |         {% endfor %}
21 |       </dl>
22 |       <dl>
23 |         <dt>{{ _('On Read the Docs') }}</dt>
24 |           <dd>
25 |             <a href="//{{ PRODUCTION_DOMAIN }}/projects/{{ slug }}/?fromdocs={{ slug }}">{{ _('Project Home') }}</a>
26 |           </dd>
27 |           <dd>
28 |             <a href="//{{ PRODUCTION_DOMAIN }}/builds/{{ slug }}/?fromdocs={{ slug }}">{{ _('Builds') }}</a>
29 |           </dd>
30 |       </dl>
31 |       <hr/>
32 |       {% trans %}Free document hosting provided by <a href="http://www.readthedocs.org">Read the Docs</a>.{% endtrans %}
33 | 
34 |     </div>
35 |   </div>
36 | {% endif %}
37 | 
38 | 


--------------------------------------------------------------------------------
/docs/source/tts/intro.rst:
--------------------------------------------------------------------------------
1 | Speech Synthesis
2 | ================


--------------------------------------------------------------------------------
/docs/source/tts/models.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ======
 3 | 
 4 | Currently, NeMo's TTS collection supports the following models:
 5 | 
 6 | .. _WaveGlow_Model:
 7 | 
 8 | WaveGlow
 9 | --------
10 | 
11 | WaveGlow :cite:`tts-models-prenger2018waveglow` is a Flow-based generative model that generates audio from mel spectrograms.
12 | Comprised of several flow steps, WaveGlow learns an invertible mapping from a simple latent space to audio waveforms.
13 | 
14 |     .. image:: waveglow.png
15 |         :align: center
16 |         :alt: waveglow model
17 | 
18 | WaveGlow can be instantiated using the :class:`WaveGlowModel<nemo.collections.tts.models.WaveGlowModel>` class.
19 | 
20 | 
21 | SqueezeWave
22 | -----------
23 | 
24 | SqueezeWave :cite:`tts-models-zhai2020squeezewave` is a version of WaveGlow :cite:`tts-models-prenger2018waveglow` that simplifies the architecture of the WaveNet (WN) module by introducing depthwise separable convolutions and removing dual channels.
25 | SqueezeWave also uses larger group sizes, which reduces computation along the temporal axis and allows for less upsampling layers for mel spectrogram.
26 | 
27 |     .. image:: squeezewave_wn.png
28 |         :align: center
29 |         :alt: squeezewave vs waveglow wavenet modules
30 | 
31 | SqueezeWave can be instantiated using the :class:`SqueezeWaveModel<nemo.collections.tts.models.SqueezeWaveModel>` class.
32 | 
33 | 
34 | References
35 | ----------
36 | 
37 | .. bibliography:: tts_all.bib
38 |     :style: plain
39 |     :labelprefix: TTS-MODELS
40 |     :keyprefix: tts-models-
41 | 


--------------------------------------------------------------------------------
/docs/source/tts/squeezewave_wn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/tts/squeezewave_wn.png


--------------------------------------------------------------------------------
/docs/source/tts/waveglow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/tts/waveglow.png


--------------------------------------------------------------------------------
/docs/update_docs.sh:
--------------------------------------------------------------------------------
1 | rm -rf build
2 | make clean
3 | make html
4 | 


--------------------------------------------------------------------------------
/examples/cv/mnist_lenet5_image_classification_pure_lightning.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | 
17 | import pytorch_lightning as ptl
18 | from omegaconf import DictConfig
19 | 
20 | from nemo.collections.cv.models import MNISTLeNet5, MNISTLeNet5Config
21 | from nemo.core.config import Config, TrainerConfig, set_config
22 | from nemo.utils import logging
23 | 
24 | 
25 | @dataclass
26 | class AppConfig(Config):
27 |     """
28 |     This is structured config for this application.
29 | 
30 |     Args:
31 |         name: Description of the application.
32 |         trainer: configuration of the trainer.
33 |         model: configuation of the model.
34 |     """
35 | 
36 |     name: str = "Training of a LeNet-5 Model using a pure PyTorchLightning approach - using DDP on 2 GPUs."
37 |     trainer: TrainerConfig = TrainerConfig(gpus=2, distributed_backend="dp")
38 |     model: MNISTLeNet5Config = MNISTLeNet5Config()
39 | 
40 | 
41 | @set_config(config=AppConfig)
42 | def main(cfg: DictConfig):
43 |     # Show configuration - user can modify every parameter from command line!
44 |     logging.info("Application config\n" + cfg.pretty())
45 | 
46 |     # The "model" - with dataloader/dataset inside of it.
47 |     lenet5 = MNISTLeNet5(cfg.model)
48 | 
49 |     # Setup train data loader and optimizer
50 |     lenet5.setup_training_data()
51 | 
52 |     # Setup optimizer and scheduler
53 |     lenet5.setup_optimization()
54 | 
55 |     # Create trainer.
56 |     trainer = ptl.Trainer(**(cfg.trainer))
57 | 
58 |     # Train.
59 |     trainer.fit(model=lenet5)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()  # TODO: No cfg in function call, and no hydra runner
64 | 


--------------------------------------------------------------------------------
/examples/nlp/glue_benchmark/glue_benchmark.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | ## Tasks
17 | This script works with all GLUE Benchmark tasks, more details about the GLUE Benchmark could be found at
18 | https://gluebenchmark.com/
19 | 
20 | More details on how to use this script could be found in tutorials/nlp/GLUE_Benchmark.ipynb
21 | 
22 | ## Model Training
23 | 
24 | To train GLUEModel with the default config file, run:
25 |     python glue_benchmark.py \
26 |     model.dataset.data_dir=<PATH_TO_DATA_DIR>  \
27 |     model.task_name=TASK_NAME \
28 |     trainer.max_epochs=<NUM_EPOCHS> \
29 |     trainer.gpus="[<CHANGE_TO_GPU_YOU_WANT_TO_USE>]
30 | 
31 | Supported task names:
32 | ["cola", "sst-2", "mrpc", "sts-b", "qqp", "mnli", "qnli", "rte", "wnli"]
33 | Note, MNLI task includes both matched and mismatched dev sets
34 | """
35 | 
36 | import pytorch_lightning as pl
37 | from omegaconf import DictConfig
38 | 
39 | from nemo.collections.nlp.models import GLUEModel
40 | from nemo.core.config import hydra_runner
41 | from nemo.utils import logging
42 | from nemo.utils.exp_manager import exp_manager
43 | 
44 | 
45 | @hydra_runner(config_name="glue_benchmark_config")
46 | def main(cfg: DictConfig) -> None:
47 |     logging.info(f'Config: {cfg.pretty()}')
48 |     trainer = pl.Trainer(**cfg.trainer)
49 |     exp_manager_cfg = cfg.get("exp_manager", None)
50 |     if exp_manager_cfg:
51 |         exp_manager_cfg.name = cfg.model.task_name
52 |         logging.info(f'Setting task_name to {exp_manager_cfg.name} in exp_manager')
53 |     exp_manager(trainer, exp_manager_cfg)
54 |     model = GLUEModel(cfg.model, trainer=trainer)
55 |     trainer.fit(model)
56 |     if cfg.model.nemo_path:
57 |         model.save_to(cfg.model.nemo_path)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     main()
62 | 


--------------------------------------------------------------------------------
/examples/nlp/intent_slot_classification/intent_slot_classification.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytorch_lightning as pl
16 | from omegaconf import DictConfig, OmegaConf
17 | 
18 | from nemo.collections.nlp.models import IntentSlotClassificationModel
19 | from nemo.core.config import hydra_runner
20 | from nemo.utils import logging
21 | from nemo.utils.exp_manager import exp_manager
22 | 
23 | 
24 | @hydra_runner(config_path="conf", config_name="intent_slot_classification_config")
25 | def main(cfg: DictConfig) -> None:
26 |     logging.info(f'Config Params:\n {OmegaConf.to_yaml(cfg)}')
27 |     trainer = pl.Trainer(**cfg.trainer)
28 |     exp_manager(trainer, cfg.get("exp_manager", None))
29 | 
30 |     model = IntentSlotClassificationModel(cfg.model, trainer=trainer)
31 |     trainer.fit(model)
32 | 
33 |     if cfg.model.nemo_path:
34 |         model.save_to(cfg.model.nemo_path)
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     main()
39 | 


--------------------------------------------------------------------------------
/examples/nlp/language_modeling/bert_pretraining.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import pytorch_lightning as pl
17 | from omegaconf import DictConfig
18 | 
19 | from nemo.collections.nlp.models.language_modeling import BERTLMModel
20 | from nemo.core.config import hydra_runner
21 | from nemo.utils import logging
22 | from nemo.utils.exp_manager import exp_manager
23 | 
24 | 
25 | @hydra_runner(config_path="conf", config_name="bert_pretraining_from_text_config")
26 | def main(cfg: DictConfig) -> None:
27 |     logging.info(f'Config:\n {cfg.pretty()}')
28 |     trainer = pl.Trainer(**cfg.trainer)
29 |     exp_manager(trainer, cfg.get("exp_manager", None))
30 |     bert_model = BERTLMModel(cfg.model, trainer=trainer)
31 |     trainer.fit(bert_model)
32 |     if cfg.model.nemo_path:
33 |         bert_model.save_to(cfg.model.nemo_path)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     main()
38 | 


--------------------------------------------------------------------------------
/examples/nlp/language_modeling/conf/bert_pretraining_from_preprocessed_config.yaml:
--------------------------------------------------------------------------------
 1 | # BERT Pretraining from Preprocessed (tokenized) data
 2 | name: &name PretrainingBERTFromPreprocessed
 3 | trainer:
 4 |   gpus: 8 # the number of gpus, 0 for CPU, or list with gpu indices
 5 |   num_nodes: 1
 6 |   max_steps: 2285714 # precedence over max_epochs
 7 |   num_sanity_val_steps: 0 # needed for bert pretraining from preproc
 8 |   replace_sampler_ddp: false # needed for bert pretraining from preproc
 9 |   accumulate_grad_batches: 1 # accumulates grads every k batches
10 |   precision: 16 # 16 to use AMP
11 |   amp_level: O1 # O1 or O2 if using AMP
12 |   distributed_backend: ddp
13 |   gradient_clip_val: 1.0
14 |   row_log_interval: 1
15 |   val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch
16 |   checkpoint_callback: false # provided by exp_manager
17 |   logger: false # provided by exp_manager
18 | 
19 | model:
20 |   nemo_path: null # exported .nemo path
21 |   only_mlm_loss: true # only use masked language model without next sentence prediction
22 |   num_tok_classification_layers: 1 # number of token classification head output layers
23 |   num_seq_classification_layers: 2 # number of sequence classification head output layers
24 | 
25 | 
26 |   language_model:
27 |     pretrained_model_name: bert-base-uncased # huggingface model name
28 |     lm_checkpoint: null
29 |     config:
30 |       attention_probs_dropout_prob: 0.1
31 |       hidden_act: gelu
32 |       hidden_dropout_prob: 0.1
33 |       hidden_size: 768
34 |       initializer_range: 0.02
35 |       intermediate_size: 3072
36 |       max_position_embeddings: 512
37 |       num_attention_heads: 12
38 |       num_hidden_layers: 12
39 |       type_vocab_size: 2
40 |       vocab_size: 30522
41 |     config_file: null # json file, precedence over config
42 | 
43 |   tokenizer: null
44 | 
45 |   train_ds:
46 |     data_file: null # path to hdf5 file (or directory)
47 |     max_predictions_per_seq: 80
48 |     batch_size: 16
49 |     shuffle: true
50 |     num_samples: -1
51 |     num_workers: 2
52 |     drop_last: false
53 |     pin_memory: false
54 | 
55 |   optim:
56 |     name: adamw
57 |     lr: 0.4375e-4
58 |     weight_decay: 0.01
59 | 
60 |     sched:
61 |       name: SquareRootAnnealing
62 |       warmup_steps: null
63 |       warmup_ratio: 0.01
64 |       min_lr: 0.0
65 |       last_epoch: -1
66 | 
67 | 
68 | exp_manager:
69 |   exp_dir: null # where to store logs and checkpoints
70 |   name: *name # name of experiment
71 |   create_tensorboard_logger: True
72 |   create_checkpoint_callback: True
73 | 
74 | 
75 | hydra:
76 |   run:
77 |     dir: .
78 |   job_logging:
79 |     root:
80 |       handlers: null


--------------------------------------------------------------------------------
/examples/nlp/language_modeling/conf/transformer_lm_config.yaml:
--------------------------------------------------------------------------------
 1 | # Config file for training left-to-right Transformer language model
 2 | name: &name TransformerLM
 3 | 
 4 | trainer:
 5 |   gpus: 1 # the number of gpus, 0 for CPU
 6 |   num_nodes: 1
 7 |   max_epochs: 2
 8 |   max_steps: 400 # precedence over max_epochs
 9 |   accumulate_grad_batches: 1 # accumulates grads every k batches
10 |   amp_level: O2 # O1/O2 for mixed precision
11 |   precision: 16 # Should be set to 16 for O1 and O2, default is 16 as PT ignores it when am_level is O0
12 |   distributed_backend: ddp
13 |   checkpoint_callback: False  # Provided by exp_manager
14 |   logger: False  # Provided by exp_manager
15 |   row_log_interval: 1  # Interval of logging.
16 |   val_check_interval: 1.0  # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
17 |   resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
18 | 
19 | model:
20 | 
21 | 
22 |   language_model:
23 |     tokenizer: word
24 |     special_tokens:
25 |         unk_token: '<UNK>'
26 |         pad_token: '<PAD>'
27 |         bos_token: '<BOS>'
28 |         eos_token: '<EOS>'
29 |     vocab_file: ???
30 |     hidden_size: 512
31 |     num_layers: 6
32 |     num_attn_heads: 8
33 |     inner_size: 2048
34 |     max_seq_length: 256
35 |     embedding_dropout: 0
36 |     ffn_dropout: 0
37 |     attn_score_dropout: 0
38 |     attn_layer_dropout: 0
39 | 
40 |   dataset:
41 |     max_seq_length: 256
42 |     num_workers: 2 # number of workers for data loaders
43 |     drop_last: false # drops the last last batch if it is smaller than the batch size
44 |     pin_memory: false # enables pin_memory feature of the data loaders
45 | 
46 |   train_ds:
47 |     file_name: ??? # path to file with training data
48 |     batch_size: 32
49 |     shuffle: true
50 |     num_samples: -1 # number of samples to be considered, -1 means all the dataset
51 | 
52 |   validation_ds:
53 |     file_name: ??? # path to file with validation data
54 |     batch_size: 32
55 |     shuffle: false
56 |     num_samples: -1 # number of samples to be considered, -1 means all the dataset
57 |     predict_last_k: 64
58 | 
59 |   optim:
60 |     name: adam
61 |     lr: 1e-4
62 |     betas: [0.9, 0.999]
63 |     weight_decay: 0
64 | 
65 |     sched:
66 |       name: WarmupAnnealing
67 |       warmup_steps: null
68 |       warmup_ratio: 0.05
69 |       last_epoch: -1
70 | 
71 |       # pytorch lightning args
72 |       monitor: val_loss
73 |       reduce_on_plateau: false
74 | 
75 | exp_manager:
76 |   exp_dir: null  # where to store logs and checkpoints
77 |   name: *name  # name of experiment
78 |   create_tensorboard_logger: True
79 |   create_checkpoint_callback: True
80 | 
81 | hydra:
82 |   run:
83 |     dir: .
84 |   job_logging:
85 |     root:
86 |       handlers: null
87 | 


--------------------------------------------------------------------------------
/examples/nlp/language_modeling/convert_weights_to_nemo1.0.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | Converts BERT NeMo0.* checkpoints to NeMo1.0 format.
17 | """
18 | 
19 | from argparse import ArgumentParser
20 | 
21 | import torch
22 | 
23 | parser = ArgumentParser()
24 | parser.add_argument("--bert_encoder", required=True, help="path to BERT encoder, e.g. /../BERT-STEP-2285714.pt")
25 | parser.add_argument(
26 |     "--bert_token_classifier",
27 |     required=True,
28 |     help="path to BERT token classifier, e.g. /../BertTokenClassifier-STEP-2285714.pt",
29 | )
30 | parser.add_argument(
31 |     "--bert_sequence_classifier",
32 |     required=False,
33 |     default=None,
34 |     help="path to BERT sequence classifier, e.g /../SequenceClassifier-STEP-2285714.pt",
35 | )
36 | parser.add_argument(
37 |     "--output_path", required=False, default="converted_model.pt", help="output path to newly converted model"
38 | )
39 | args = parser.parse_args()
40 | 
41 | bert_in = torch.load(args.bert_encoder)
42 | tok_in = torch.load(args.bert_token_classifier)
43 | if args.bert_sequence_classifier:
44 |     seq_in = torch.load(args.bert_sequence_classifier)
45 | 
46 | new_dict = {}
47 | new_model = {"state_dict": new_dict}
48 | for k in bert_in:
49 |     new_name = k.replace("bert.", "bert_model.")
50 |     new_dict[new_name] = bert_in[k]
51 | 
52 | for k in tok_in:
53 |     new_name = "mlm_classifier." + k
54 |     new_dict[new_name] = tok_in[k]
55 | 
56 | if args.bert_sequence_classifier:
57 |     for k in seq_in:
58 |         new_name = "nsp_classifier." + k
59 |         new_dict[new_name] = seq_in[k]
60 | 
61 | torch.save(new_model, args.output_path)
62 | 


--------------------------------------------------------------------------------
/examples/nlp/language_modeling/get_wkt2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | """
 4 | This file is adapted from
 5 | https://github.com/salesforce/awd-lstm-lm/blob/master/getdata.sh
 6 | Copyright by the AWD LSTM authors.
 7 | """
 8 | DATA_DIR=$1
 9 | echo "- Downloading WikiText-2"
10 | 
11 | wget --continue -P $DATA_DIR https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
12 | unzip -q $DATA_DIR/wikitext-2-v1.zip -d $DATA_DIR
13 | cd $DATA_DIR/wikitext-2
14 | mv wiki.train.tokens train.txt
15 | sed -i -e "s/<unk>/[UNK]/g" train.txt
16 | mv wiki.valid.tokens valid.txt
17 | sed -i -e "s/<unk>/[UNK]/g" valid.txt
18 | mv wiki.test.tokens test.txt
19 | sed -i -e "s/<unk>/[UNK]/g" test.txt
20 | cd ..
21 | rm wikitext-2-v1.zip
22 | 
23 | echo "- WikiText-2 saved at $DATA_DIR/wikitext-2"
24 | 


--------------------------------------------------------------------------------
/examples/nlp/language_modeling/transformer_lm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import pytorch_lightning as pl
17 | from omegaconf import DictConfig
18 | 
19 | from nemo.collections.nlp.models.language_modeling import TransformerLMModel
20 | from nemo.core.config import hydra_runner
21 | from nemo.utils import logging
22 | from nemo.utils.exp_manager import exp_manager
23 | 
24 | 
25 | @hydra_runner(config_path="conf", config_name="transformer_lm_config")
26 | def main(cfg: DictConfig) -> None:
27 |     logging.info(f'Config: {cfg.pretty()}')
28 |     trainer = pl.Trainer(**cfg.trainer)
29 |     exp_manager(trainer, cfg.get("exp_manager", None))
30 |     transformer_lm = TransformerLMModel(cfg.model, trainer=trainer)
31 |     trainer.fit(transformer_lm)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/examples/nlp/question_answering/get_squad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | import os
17 | import urllib.request
18 | 
19 | from nemo.utils import logging
20 | 
21 | 
22 | class SquadDownloader:
23 |     def __init__(self, save_path):
24 |         self.save_path = save_path + '/squad'
25 | 
26 |         if not os.path.exists(self.save_path):
27 |             os.makedirs(self.save_path)
28 | 
29 |         if not os.path.exists(self.save_path + '/v1.1'):
30 |             os.makedirs(self.save_path + '/v1.1')
31 | 
32 |         if not os.path.exists(self.save_path + '/v2.0'):
33 |             os.makedirs(self.save_path + '/v2.0')
34 | 
35 |         self.download_urls = {
36 |             'https://rajpurkar.github.io/SQuAD-explorer' '/dataset/train-v1.1.json': 'v1.1/train-v1.1.json',
37 |             'https://rajpurkar.github.io/SQuAD-explorer' '/dataset/dev-v1.1.json': 'v1.1/dev-v1.1.json',
38 |             'https://rajpurkar.github.io/SQuAD-explorer' '/dataset/train-v2.0.json': 'v2.0/train-v2.0.json',
39 |             'https://rajpurkar.github.io/SQuAD-explorer' '/dataset/dev-v2.0.json': 'v2.0/dev-v2.0.json',
40 |         }
41 | 
42 |     def download(self):
43 |         for item in self.download_urls:
44 |             url = item
45 |             file = self.download_urls[item]
46 | 
47 |             logging.info('Downloading: %s', url)
48 |             if os.path.isfile(self.save_path + '/' + file):
49 |                 logging.info('** Download file already exists, skipping download')
50 |             else:
51 |                 response = urllib.request.urlopen(url)
52 |                 with open(self.save_path + '/' + file, "wb") as handle:
53 |                     handle.write(response.read())
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     parser = argparse.ArgumentParser(description='Download Squad')
58 |     parser.add_argument(
59 |         '--destDir',
60 |         type=str,
61 |         required=False,
62 |         help='directory to store data',
63 |         default=os.path.split(os.path.abspath(__file__))[0],
64 |     )
65 |     args = parser.parse_args()
66 |     logging.info(args.destDir)
67 |     squad_dl = SquadDownloader(args.destDir)
68 |     squad_dl.download()
69 | 


--------------------------------------------------------------------------------
/examples/nlp/question_answering/question_answering_squad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import os
17 | 
18 | import pytorch_lightning as pl
19 | from omegaconf import DictConfig
20 | 
21 | from nemo.collections.nlp.models.question_answering.qa_model import QAModel
22 | from nemo.core.config import hydra_runner
23 | from nemo.utils import logging
24 | from nemo.utils.exp_manager import exp_manager
25 | 
26 | 
27 | @hydra_runner(config_path="conf", config_name="question_answering_squad_config")
28 | def main(cfg: DictConfig) -> None:
29 |     logging.info(f'Config: {cfg.pretty()}')
30 |     trainer = pl.Trainer(**cfg.trainer)
31 |     log_dir = exp_manager(trainer, cfg.get("exp_manager", None))
32 |     infer_datasets = [cfg.model.validation_ds, cfg.model.test_ds]
33 |     for infer_dataset in infer_datasets:
34 |         if infer_dataset.output_prediction_file is not None:
35 |             infer_dataset.output_prediction_file = os.path.join(log_dir, infer_dataset.output_prediction_file)
36 |         if infer_dataset.output_nbest_file is not None:
37 |             infer_dataset.output_nbest_file = os.path.join(log_dir, infer_dataset.output_nbest_file)
38 | 
39 |     question_answering_model = QAModel(cfg.model, trainer=trainer)
40 |     trainer.fit(question_answering_model)
41 | 
42 |     if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.file is not None:
43 |         gpu = 1 if cfg.trainer.gpus != 0 else 0
44 |         trainer = pl.Trainer(gpus=gpu)
45 |         if question_answering_model.prepare_test(trainer):
46 |             trainer.test(question_answering_model)
47 | 
48 |     if cfg.model.nemo_path:
49 |         question_answering_model.save_to(cfg.model.nemo_path)
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/examples/speaker_recognition/speaker_reco.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytorch_lightning as pl
18 | from omegaconf.listconfig import ListConfig
19 | from pytorch_lightning import seed_everything
20 | 
21 | from nemo.collections.asr.models import EncDecSpeakerLabelModel
22 | from nemo.core.config import hydra_runner
23 | from nemo.utils import logging
24 | from nemo.utils.exp_manager import exp_manager
25 | 
26 | """
27 | Basic run (on GPU for 10 epochs for 2 class training):
28 | EXP_NAME=sample_run
29 | python ./speaker_reco.py --config-path='conf' --config-name='SpeakerNet_recognition_3x2x512.yaml' \
30 |     trainer.max_epochs=10  \
31 |     model.train_ds.batch_size=64 model.validation_ds.batch_size=64 \
32 |     model.train_ds.manifest_filepath="<train_manifest>" model.validation_ds.manifest_filepath="<dev_manifest>" \
33 |     model.test_ds.manifest_filepath="<test_manifest>" \
34 |     trainer.gpus=1 \
35 |     model.decoder.params.num_classes=2 \
36 |     exp_manager.name=$EXP_NAME +exp_manager.use_datetime_version=False \
37 |     exp_manager.exp_dir='./speaker_exps'
38 | 
39 | See https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_recognition/Speaker_Recognition_Verification.ipynb for notebook tutorial
40 | """
41 | 
42 | seed_everything(42)
43 | 
44 | 
45 | @hydra_runner(config_path="conf", config_name="SpeakerNet_recognition_3x2x512.yaml")
46 | def main(cfg):
47 | 
48 |     logging.info(f'Hydra config: {cfg.pretty()}')
49 |     trainer = pl.Trainer(**cfg.trainer)
50 |     log_dir = exp_manager(trainer, cfg.get("exp_manager", None))
51 |     speaker_model = EncDecSpeakerLabelModel(cfg=cfg.model, trainer=trainer)
52 |     trainer.fit(speaker_model)
53 |     model_path = os.path.join(log_dir, '..', 'spkr.nemo')
54 |     speaker_model.save_to(model_path)
55 | 
56 |     if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
57 |         gpu = 1 if cfg.trainer.gpus != 0 else 0
58 |         trainer = pl.Trainer(gpus=gpu)
59 |         if speaker_model.prepare_test(trainer):
60 |             trainer.test(speaker_model)
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     main()
65 | 


--------------------------------------------------------------------------------
/examples/speaker_recognition/spkr_get_emb.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytorch_lightning as pl
18 | from omegaconf.listconfig import ListConfig
19 | from pytorch_lightning import seed_everything
20 | 
21 | from nemo.collections.asr.models import ExtractSpeakerEmbeddingsModel
22 | from nemo.core.config import hydra_runner
23 | from nemo.utils import logging
24 | from nemo.utils.exp_manager import exp_manager
25 | 
26 | """
27 | To extract embeddings
28 | Place pretrained model in ${EXP_DIR}/${EXP_NAME} with spkr.nemo
29 |     python spkr_get_emb.py --config-path='conf' --config-name='SpeakerNet_verification_3x2x512.yaml' \
30 |         +model.test_ds.manifest_filepath="<test_manifest_file>" \
31 |         +model.test_ds.sample_rate=16000 \
32 |         +model.test_ds.labels=null \
33 |         +model.test_ds.batch_size=1 \
34 |         +model.test_ds.shuffle=False \
35 |         +model.test_ds.time_length=8 \
36 |         exp_manager.exp_name=${EXP_NAME} \
37 |         exp_manager.exp_dir=${EXP_DIR} \
38 |         trainer.gpus=1 
39 | 
40 | See https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_recognition/Speaker_Recognition_Verification.ipynb for notebook tutorial
41 | """
42 | 
43 | seed_everything(42)
44 | 
45 | 
46 | @hydra_runner(config_path="conf", config_name="config")
47 | def main(cfg):
48 | 
49 |     logging.info(f'Hydra config: {cfg.pretty()}')
50 |     if (isinstance(cfg.trainer.gpus, ListConfig) and len(cfg.trainer.gpus) > 1) or (
51 |         isinstance(cfg.trainer.gpus, (int, str)) and int(cfg.trainer.gpus) > 1
52 |     ):
53 |         logging.info("changing gpus to 1 to minimize DDP issues while extracting embeddings")
54 |         cfg.trainer.gpus = 1
55 |         cfg.trainer.distributed_backend = None
56 |     trainer = pl.Trainer(**cfg.trainer)
57 |     log_dir = exp_manager(trainer, cfg.get("exp_manager", None))
58 |     model_path = os.path.join(log_dir, '..', 'spkr.nemo')
59 |     speaker_model = ExtractSpeakerEmbeddingsModel.restore_from(model_path)
60 |     speaker_model.setup_test_data(cfg.model.test_ds)
61 |     trainer.test(speaker_model)
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     main()
66 | 


--------------------------------------------------------------------------------
/examples/tts/conf/waveglow.yaml:
--------------------------------------------------------------------------------
  1 | name: &name "WaveGlow"
  2 | sample_rate: &sr 22050
  3 | n_fft: &n_fft 1024
  4 | n_mels: &n_mels 80
  5 | fmax: &fmax null
  6 | pad_value: &pad_value -11.52
  7 | train_dataset: ???
  8 | validation_datasets: ???
  9 | 
 10 | model:
 11 |   sigma: 1.0
 12 |   train_ds:
 13 |     dataset:
 14 |       cls: "nemo.collections.tts.data.datalayers.AudioDataset"
 15 |       params:
 16 |         manifest_filepath: ${train_dataset}
 17 |         max_duration: null
 18 |         min_duration: 0.1
 19 |         n_segments: 16000
 20 |         trim: false
 21 |     dataloader_params:
 22 |       drop_last: false
 23 |       shuffle: true
 24 |       batch_size: 12
 25 |       num_workers: 4
 26 | 
 27 |   validation_ds:
 28 |     dataset:
 29 |       cls: "nemo.collections.tts.data.datalayers.AudioDataset"
 30 |       params:
 31 |         manifest_filepath: ${validation_datasets}
 32 |         max_duration: null
 33 |         min_duration: 0.1
 34 |         n_segments: -1
 35 |         trim: false
 36 |     dataloader_params:
 37 |       drop_last: false
 38 |       shuffle: false
 39 |       batch_size: 12
 40 |       num_workers: 4
 41 | 
 42 |   preprocessor:
 43 |     cls: nemo.collections.asr.parts.features.FilterbankFeatures
 44 |     params:
 45 |       dither: 0.0
 46 |       nfilt: *n_mels
 47 |       frame_splicing: 1
 48 |       highfreq: *fmax
 49 |       log: true
 50 |       log_zero_guard_type: clamp
 51 |       log_zero_guard_value: 1e-05
 52 |       lowfreq: 0
 53 |       mag_power: 1.0
 54 |       n_fft: *n_fft
 55 | 
 56 |       # Waveglow is currently hardcoded to these values for window size and stride
 57 |       # Changing these parameters are not recommended
 58 |       n_window_size: 1024
 59 |       n_window_stride: 256
 60 | 
 61 |       normalize: null
 62 |       pad_to: 16
 63 |       pad_value: *pad_value
 64 |       preemph: null
 65 |       sample_rate: *sr
 66 |       stft_conv: true
 67 |       window: hann
 68 | 
 69 |   waveglow:
 70 |     cls: nemo.collections.tts.modules.waveglow.WaveGlowModule
 71 |     params:
 72 |       n_early_every: 4
 73 |       n_early_size: 2
 74 |       n_flows: 12
 75 |       n_group: 8
 76 |       n_mel_channels: *n_mels
 77 |       n_wn_channels: 512
 78 |       n_wn_layers: 8
 79 |       wn_kernel_size: 3
 80 | 
 81 |   optim:
 82 |     name: adam
 83 |     lr: 1e-4
 84 | 
 85 | trainer:
 86 |   gpus: 1 # number of gpus
 87 |   max_epochs: ???
 88 |   num_nodes: 1
 89 |   distributed_backend: ddp
 90 |   accumulate_grad_batches: 1
 91 |   checkpoint_callback: False  # Provided by exp_manager
 92 |   logger: False  # Provided by exp_manager
 93 |   log_save_interval: 1000
 94 |   row_log_interval: 200
 95 |   check_val_every_n_epoch: 25
 96 |   precision: 16
 97 | 
 98 | exp_manager:
 99 |   exp_dir: null
100 |   name: *name
101 |   create_tensorboard_logger: True
102 |   create_checkpoint_callback: True
103 | 


--------------------------------------------------------------------------------
/examples/tts/glow_tts.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytorch_lightning as pl
16 | 
17 | from nemo.collections.common.callbacks import LogEpochTimeCallback
18 | from nemo.collections.tts.models import GlowTTSModel
19 | from nemo.core.config import hydra_runner
20 | from nemo.utils.exp_manager import exp_manager
21 | 
22 | 
23 | @hydra_runner(config_path="conf", config_name="glow_tts")
24 | def main(cfg):
25 |     trainer = pl.Trainer(**cfg.trainer)
26 |     exp_manager(trainer, cfg.get("exp_manager", None))
27 |     model = GlowTTSModel(cfg=cfg.model, trainer=trainer)
28 |     lr_logger = pl.callbacks.LearningRateLogger()
29 |     epoch_time_logger = LogEpochTimeCallback()
30 |     trainer.callbacks.extend([lr_logger, epoch_time_logger])
31 |     trainer.fit(model)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()  # noqa pylint: disable=no-value-for-parameter
36 | 


--------------------------------------------------------------------------------
/examples/tts/squeezewave.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytorch_lightning as pl
16 | 
17 | from nemo.collections.common.callbacks import LogEpochTimeCallback
18 | from nemo.collections.tts.models import SqueezeWaveModel
19 | from nemo.core.config import hydra_runner
20 | from nemo.utils.exp_manager import exp_manager
21 | 
22 | 
23 | @hydra_runner(config_path="conf", config_name="squeezewave")
24 | def main(cfg):
25 |     trainer = pl.Trainer(**cfg.trainer)
26 |     exp_manager(trainer, cfg.get("exp_manager", None))
27 |     model = SqueezeWaveModel(cfg=cfg.model, trainer=trainer)
28 |     epoch_time_logger = LogEpochTimeCallback()
29 |     trainer.callbacks.extend([epoch_time_logger])
30 |     trainer.fit(model)
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     main()  # noqa pylint: disable=no-value-for-parameter
35 | 


--------------------------------------------------------------------------------
/examples/tts/tacotron2.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytorch_lightning as pl
16 | 
17 | from nemo.collections.common.callbacks import LogEpochTimeCallback
18 | from nemo.collections.tts.models import Tacotron2Model
19 | from nemo.core.config import hydra_runner
20 | from nemo.utils.exp_manager import exp_manager
21 | 
22 | 
23 | @hydra_runner(config_path="conf", config_name="tacotron2")
24 | def main(cfg):
25 |     trainer = pl.Trainer(**cfg.trainer)
26 |     exp_manager(trainer, cfg.get("exp_manager", None))
27 |     model = Tacotron2Model(cfg=cfg.model, trainer=trainer)
28 |     lr_logger = pl.callbacks.LearningRateLogger()
29 |     epoch_time_logger = LogEpochTimeCallback()
30 |     trainer.callbacks.extend([lr_logger, epoch_time_logger])
31 |     trainer.fit(model)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()  # noqa pylint: disable=no-value-for-parameter
36 | 


--------------------------------------------------------------------------------
/examples/tts/waveglow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytorch_lightning as pl
16 | 
17 | from nemo.collections.common.callbacks import LogEpochTimeCallback
18 | from nemo.collections.tts.models import WaveGlowModel
19 | from nemo.core.config import hydra_runner
20 | from nemo.utils.exp_manager import exp_manager
21 | 
22 | 
23 | @hydra_runner(config_path="conf", config_name="waveglow")
24 | def main(cfg):
25 |     trainer = pl.Trainer(**cfg.trainer)
26 |     exp_manager(trainer, cfg.get("exp_manager", None))
27 |     model = WaveGlowModel(cfg=cfg.model, trainer=trainer)
28 |     epoch_time_logger = LogEpochTimeCallback()
29 |     trainer.callbacks.extend([epoch_time_logger])
30 |     trainer.fit(model)
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     main()  # noqa pylint: disable=no-value-for-parameter
35 | 


--------------------------------------------------------------------------------
/nemo/README.md:
--------------------------------------------------------------------------------
 1 | NeMo (**Ne**ural **Mo**dules) is a toolkit for creating AI applications built around **neural modules**, conceptual blocks of neural networks that take *typed* inputs and produce *typed* outputs.
 2 | 
 3 | **NeMo Core** provides common APIs all modules and models have to implement.
 4 | 
 5 | **NeMo Collections**
 6 | 
 7 | * ASR - collection of modules and models for building speech recognition networks
 8 | * TTS - collection of modules and models for building speech synthesis networks
 9 | * NLP - collection of modules and models for building NLP networks
10 | 


--------------------------------------------------------------------------------
/nemo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import os
17 | 
18 | from .package_info import (
19 |     __contact_emails__,
20 |     __contact_names__,
21 |     __description__,
22 |     __download_url__,
23 |     __homepage__,
24 |     __keywords__,
25 |     __license__,
26 |     __package_name__,
27 |     __repository_url__,
28 |     __shortversion__,
29 |     __version__,
30 | )
31 | 
32 | if "NEMO_PACKAGE_BUILDING" not in os.environ:
33 |     from nemo import core
34 |     from nemo import utils
35 |     from nemo import collections
36 | 


--------------------------------------------------------------------------------
/nemo/collections/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.asr import data, losses, models, modules
16 | from nemo.package_info import __version__
17 | 
18 | # Set collection version equal to NeMo version.
19 | __version = __version__
20 | 
21 | # Authorship.
22 | __author__ = "NVIDIA Corporation"
23 | 
24 | # Set collection name.
25 | __description__ = "Automatic Speech Recognition collection"
26 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/losses/angularloss.py:
--------------------------------------------------------------------------------
 1 | # ! /usr/bin/python
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | 
18 | from nemo.core.classes import Loss, Typing, typecheck
19 | from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType
20 | 
21 | __all__ = ['AngularSoftmaxLoss']
22 | 
23 | 
24 | class AngularSoftmaxLoss(Loss, Typing):
25 |     """
26 |     Computes ArcFace Angular softmax angle loss
27 |     reference: https://openaccess.thecvf.com/content_CVPR_2019/papers/Deng_ArcFace_Additive_Angular_Margin_Loss_for_Deep_Face_Recognition_CVPR_2019_paper.pdf
28 |     args:
29 |     scale: scale value for cosine angle
30 |     margin: margin value added to cosine angle 
31 |     """
32 | 
33 |     @property
34 |     def input_types(self):
35 |         """Input types definitions for AnguarLoss.
36 |         """
37 |         return {
38 |             "logits": NeuralType(('B', 'D'), LogitsType()),
39 |             "labels": NeuralType(('B',), LabelsType()),
40 |         }
41 | 
42 |     @property
43 |     def output_types(self):
44 |         """Output types definitions for AngularLoss.
45 |         loss:
46 |             NeuralType(None)
47 |         """
48 |         return {"loss": NeuralType(elements_type=LossType())}
49 | 
50 |     def __init__(self, scale=20.0, margin=1.35):
51 |         super().__init__()
52 | 
53 |         self.eps = 1e-7
54 |         self.scale = scale
55 |         self.margin = margin
56 | 
57 |     @typecheck()
58 |     def forward(self, logits, labels):
59 |         numerator = self.scale * torch.cos(
60 |             torch.acos(torch.clamp(torch.diagonal(logits.transpose(0, 1)[labels]), -1.0 + self.eps, 1 - self.eps))
61 |             + self.margin
62 |         )
63 |         excl = torch.cat(
64 |             [torch.cat((logits[i, :y], logits[i, y + 1 :])).unsqueeze(0) for i, y in enumerate(labels)], dim=0
65 |         )
66 |         denominator = torch.exp(numerator) + torch.sum(torch.exp(self.scale * excl), dim=1)
67 |         L = numerator - torch.log(denominator)
68 |         return -torch.mean(L)
69 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.asr.models.asr_model import ASRModel
16 | from nemo.collections.asr.models.classification_models import EncDecClassificationModel
17 | from nemo.collections.asr.models.ctc_bpe_models import EncDecCTCModelBPE
18 | from nemo.collections.asr.models.ctc_models import EncDecCTCModel
19 | from nemo.collections.asr.models.label_models import EncDecSpeakerLabelModel, ExtractSpeakerEmbeddingsModel
20 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/models/asr_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from abc import ABC, abstractmethod
15 | from typing import Dict, List
16 | 
17 | import torch
18 | 
19 | from nemo.core.classes import ModelPT
20 | 
21 | __all__ = ['ASRModel']
22 | 
23 | 
24 | class ASRModel(ModelPT, ABC):
25 |     @abstractmethod
26 |     def transcribe(self, paths2audio_files: List[str], batch_size: int = 4) -> List[str]:
27 |         """
28 |         Takes paths to audio files and returns text transcription
29 |         Args:
30 |             paths2audio_files: paths to audio fragment to be transcribed
31 | 
32 |         Returns:
33 |             transcription texts
34 |         """
35 |         pass
36 | 
37 |     def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0):
38 |         val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
39 |         wer_num = torch.stack([x['val_wer_num'] for x in outputs]).sum()
40 |         wer_denom = torch.stack([x['val_wer_denom'] for x in outputs]).sum()
41 |         tensorboard_logs = {'validation_loss': val_loss_mean, 'validation_wer': wer_num / wer_denom}
42 |         return {'val_loss': val_loss_mean, 'log': tensorboard_logs}
43 | 
44 |     def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0):
45 |         val_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean()
46 |         wer_num = torch.stack([x['test_wer_num'] for x in outputs]).sum()
47 |         wer_denom = torch.stack([x['test_wer_denom'] for x in outputs]).sum()
48 |         tensorboard_logs = {'test_loss': val_loss_mean, 'test_wer': wer_num / wer_denom}
49 |         return {'test_loss': val_loss_mean, 'log': tensorboard_logs}
50 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.asr.modules.audio_preprocessing import (
16 |     AudioToMelSpectrogramPreprocessor,
17 |     AudioToMFCCPreprocessor,
18 |     CropOrPadSpectrogramAugmentation,
19 |     SpectrogramAugmentation,
20 | )
21 | from nemo.collections.asr.modules.conv_asr import (
22 |     ConvASRDecoder,
23 |     ConvASRDecoderClassification,
24 |     ConvASREncoder,
25 |     SpeakerDecoder,
26 | )
27 | 


--------------------------------------------------------------------------------
/nemo/collections/asr/parts/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo/collections/common/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import nemo.collections.common.callbacks
16 | from nemo.collections.common import losses, parts, tokenizers
17 | from nemo.package_info import __version__
18 | 
19 | # Set collection version equal to NeMo version.
20 | __version = __version__
21 | 
22 | # Authorship.
23 | __author__ = "NVIDIA Corporation"
24 | 
25 | # Set collection name.
26 | __description__ = "Common collection"
27 | 


--------------------------------------------------------------------------------
/nemo/collections/common/callbacks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback
16 | 


--------------------------------------------------------------------------------
/nemo/collections/common/callbacks/callbacks.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import time
15 | 
16 | from pytorch_lightning.callbacks.base import Callback
17 | from pytorch_lightning.utilities import rank_zero_only
18 | 
19 | 
20 | class LogEpochTimeCallback(Callback):
21 |     """Simple callback that logs how long each epoch takes, in seconds, to a pytorch lightning log
22 |     """
23 | 
24 |     @rank_zero_only
25 |     def on_epoch_start(self, trainer, pl_module):
26 |         self.epoch_start = time.time()
27 | 
28 |     @rank_zero_only
29 |     def on_epoch_end(self, trainer, pl_module):
30 |         curr_time = time.time()
31 |         duration = curr_time - self.epoch_start
32 |         trainer.logger.log_metrics({"epoch_time": duration}, step=trainer.global_step)
33 | 


--------------------------------------------------------------------------------
/nemo/collections/common/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.common.losses.aggregator import AggregatorLoss
16 | from nemo.collections.common.losses.cross_entropy import CrossEntropyLoss
17 | from nemo.collections.common.losses.mse_loss import MSELoss
18 | from nemo.collections.common.losses.smoothed_cross_entropy import SmoothedCrossEntropyLoss
19 | from nemo.collections.common.losses.spanning_loss import SpanningLoss
20 | 


--------------------------------------------------------------------------------
/nemo/collections/common/losses/aggregator.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import List
16 | 
17 | import torch
18 | 
19 | from nemo.core.classes import Loss, typecheck
20 | from nemo.core.neural_types import LossType, NeuralType
21 | 
22 | __all__ = ['AggregatorLoss']
23 | 
24 | 
25 | class AggregatorLoss(Loss):
26 |     """
27 |     Sums several losses into one.
28 | 
29 |     Args:
30 |         num_inputs: number of input losses
31 |         weights: a list of coefficient for merging losses
32 |     """
33 | 
34 |     @property
35 |     def input_types(self):
36 |         """Returns definitions of module input ports.
37 |         """
38 |         input_types = {}
39 |         for i in range(self._num_losses):
40 |             input_types["loss_" + str(i + 1)] = NeuralType(elements_type=LossType())
41 | 
42 |         return input_types
43 | 
44 |     @property
45 |     def output_types(self):
46 |         """Returns definitions of module output ports.
47 |         """
48 |         return {"loss": NeuralType(elements_type=LossType())}
49 | 
50 |     def __init__(self, num_inputs: int = 2, weights: List[float] = None):
51 |         super().__init__()
52 |         self._num_losses = num_inputs
53 |         if weights is not None and len(weights) != num_inputs:
54 |             raise ValueError("Length of weights should be equal to the number of inputs (num_inputs)")
55 | 
56 |         self._weights = weights
57 | 
58 |     @typecheck()
59 |     def forward(self, **kwargs):
60 |         values = [kwargs[x] for x in sorted(kwargs.keys())]
61 |         loss = torch.zeros_like(values[0])
62 |         for loss_idx, loss_value in enumerate(values):
63 |             if self._weights is not None:
64 |                 loss = loss.add(loss_value, alpha=self._weights[loss_idx])
65 |             else:
66 |                 loss = loss.add(loss_value)
67 |         return loss
68 | 


--------------------------------------------------------------------------------
/nemo/collections/common/losses/mse_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from torch import Tensor, nn
16 | 
17 | from nemo.core.classes import Serialization, Typing, typecheck
18 | from nemo.core.neural_types import LabelsType, LossType, NeuralType, RegressionValuesType
19 | 
20 | __all__ = ['MSELoss']
21 | 
22 | 
23 | class MSELoss(nn.MSELoss, Serialization, Typing):
24 |     """
25 |     MSELoss
26 |     """
27 | 
28 |     @property
29 |     def input_types(self):
30 |         """Returns definitions of module input ports.
31 |         """
32 |         return {
33 |             "preds": NeuralType(tuple('B'), RegressionValuesType()),
34 |             "labels": NeuralType(tuple('B'), LabelsType()),
35 |         }
36 | 
37 |     @property
38 |     def output_types(self):
39 |         """Returns definitions of module output ports.
40 |         """
41 |         return {"loss": NeuralType(elements_type=LossType())}
42 | 
43 |     def __init__(self, reduction: str = 'mean'):
44 |         """
45 |         Args:
46 |             reduction: type of the reduction over the batch
47 |         """
48 |         super().__init__(reduction=reduction)
49 | 
50 |     @typecheck()
51 |     def forward(self, preds: Tensor, labels: Tensor) -> Tensor:
52 |         """
53 |         Args:
54 |             preds: output of the classifier
55 |             labels: ground truth labels
56 |         """
57 |         return super().forward(preds, labels)
58 | 


--------------------------------------------------------------------------------
/nemo/collections/common/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.common.metrics.classification_accuracy import TopKClassificationAccuracy, compute_topk_accuracy
16 | 


--------------------------------------------------------------------------------
/nemo/collections/common/parts/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.common.parts.multi_layer_perceptron import MultiLayerPerceptron
16 | from nemo.collections.common.parts.transformer_utils import *
17 | from nemo.collections.common.parts.utils import *
18 | 


--------------------------------------------------------------------------------
/nemo/collections/common/parts/multi_layer_perceptron.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | 
18 | class MultiLayerPerceptron(torch.nn.Module):
19 |     """
20 |     A simple MLP that can either be used independently or put on top
21 |     of pretrained models (such as BERT) and act as a classifier.
22 |     Args:
23 |         hidden_size (int): the size of each layer
24 |         num_classes (int): number of output classes
25 |         num_layers (int): number of layers
26 |         activation (str): type of activations for layers in between
27 |         log_softmax (bool): whether to add a log_softmax layer before output
28 |     """
29 | 
30 |     def __init__(
31 |         self,
32 |         hidden_size: int,
33 |         num_classes: int,
34 |         num_layers: int = 2,
35 |         activation: str = 'relu',
36 |         log_softmax: bool = True,
37 |     ):
38 |         super().__init__()
39 |         self.layers = 0
40 |         for _ in range(num_layers - 1):
41 |             layer = torch.nn.Linear(hidden_size, hidden_size)
42 |             setattr(self, f'layer{self.layers}', layer)
43 |             setattr(self, f'layer{self.layers + 1}', getattr(torch, activation))
44 |             self.layers += 2
45 |         layer = torch.nn.Linear(hidden_size, num_classes)
46 |         setattr(self, f'layer{self.layers}', layer)
47 |         self.layers += 1
48 |         self.log_softmax = log_softmax
49 | 
50 |     @property
51 |     def last_linear_layer(self):
52 |         return getattr(self, f'layer{self.layers - 1}')
53 | 
54 |     def forward(self, hidden_states):
55 |         output_states = hidden_states[:]
56 |         for i in range(self.layers):
57 |             output_states = getattr(self, f'layer{i}')(output_states)
58 | 
59 |         if self.log_softmax:
60 |             output_states = torch.log_softmax(output_states, dim=-1)
61 |         return output_states
62 | 


--------------------------------------------------------------------------------
/nemo/collections/common/parts/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import math
16 | import os
17 | from typing import List
18 | 
19 | __all__ = ['if_exist', '_compute_softmax']
20 | 
21 | 
22 | def if_exist(outfold: str, files: List[str]):
23 |     """
24 |     Returns true if all given files exist in the given folder
25 |     Args:
26 |         outfold: folder path
27 |         files: list of file names relative to outfold
28 |     """
29 |     if not os.path.exists(outfold):
30 |         return False
31 |     for file in files:
32 |         if not os.path.exists(f'{outfold}/{file}'):
33 |             return False
34 |     return True
35 | 
36 | 
37 | def _compute_softmax(scores):
38 |     """Compute softmax probability over raw logits."""
39 |     if not scores:
40 |         return []
41 | 
42 |     max_score = None
43 |     for score in scores:
44 |         if max_score is None or score > max_score:
45 |             max_score = score
46 | 
47 |     exp_scores = []
48 |     total_sum = 0.0
49 |     for score in scores:
50 |         x = math.exp(score - max_score)
51 |         exp_scores.append(x)
52 |         total_sum += x
53 | 
54 |     probs = []
55 |     for score in exp_scores:
56 |         probs.append(score / total_sum)
57 |     return probs
58 | 


--------------------------------------------------------------------------------
/nemo/collections/common/tokenizers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.common.tokenizers.char_tokenizer import CharTokenizer
16 | from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer
17 | from nemo.collections.common.tokenizers.sentencepiece_tokenizer import SentencePieceTokenizer
18 | from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec
19 | from nemo.collections.common.tokenizers.word_tokenizer import WordTokenizer
20 | 


--------------------------------------------------------------------------------
/nemo/collections/common/tokenizers/huggingface/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer
16 | 


--------------------------------------------------------------------------------
/nemo/collections/common/tokenizers/tokenizer_spec.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from typing import List
17 | 
18 | __all__ = ['TokenizerSpec']
19 | 
20 | 
21 | class TokenizerSpec(ABC):
22 |     """
23 |     Inherit this class to implement a new tokenizer.
24 |     """
25 | 
26 |     @abstractmethod
27 |     def text_to_tokens(self, text):
28 |         pass
29 | 
30 |     @abstractmethod
31 |     def tokens_to_text(self, tokens):
32 |         pass
33 | 
34 |     @abstractmethod
35 |     def tokens_to_ids(self, tokens):
36 |         pass
37 | 
38 |     @abstractmethod
39 |     def ids_to_tokens(self, ids):
40 |         pass
41 | 
42 |     @abstractmethod
43 |     def text_to_ids(self, text):
44 |         pass
45 | 
46 |     @abstractmethod
47 |     def ids_to_text(self, ids):
48 |         pass
49 | 
50 |     def add_special_tokens(self, special_tokens: List[str]):
51 |         raise NotImplementedError("To be implemented")
52 | 
53 |     @property
54 |     def name(self):
55 |         return type(self).__name__
56 | 


--------------------------------------------------------------------------------
/nemo/collections/common/tokenizers/word_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Optional
16 | 
17 | from nemo.collections.common.tokenizers.char_tokenizer import CharTokenizer
18 | 
19 | __all__ = ['WordTokenizer']
20 | 
21 | 
22 | class WordTokenizer(CharTokenizer):
23 |     "Tokenizes at word boundary"
24 | 
25 |     def __init__(
26 |         self,
27 |         vocab_file: str,
28 |         mask_token: Optional[str] = None,
29 |         bos_token: Optional[str] = None,
30 |         eos_token: Optional[str] = None,
31 |         pad_token: Optional[str] = None,
32 |         sep_token: Optional[str] = None,
33 |         cls_token: Optional[str] = None,
34 |         unk_token: Optional[str] = None,
35 |     ):
36 |         """
37 |         Args:
38 |             vocab_file: path to file with vocabulary which consists
39 |                 of characters separated by \n
40 |             mask_token: mask token 
41 |             bos_token: the beginning of sequence token
42 |             eos_token: the end of sequence token. Usually equal to sep_token
43 |             pad_token: token to use for padding
44 |             sep_token: token used for separating sequences
45 |             cls_token: class token. Usually equal to bos_token
46 |             unk_token: token to use for unknown tokens
47 |         """
48 | 
49 |         super().__init__(
50 |             vocab_file=vocab_file,
51 |             mask_token=mask_token,
52 |             bos_token=bos_token,
53 |             eos_token=eos_token,
54 |             pad_token=pad_token,
55 |             unk_token=unk_token,
56 |             sep_token=sep_token,
57 |             cls_token=cls_token,
58 |         )
59 | 
60 |     def text_to_tokens(self, text):
61 |         token_candidates = text.strip().split()
62 |         tokens = []
63 |         for token in token_candidates:
64 |             if token in self.vocab:
65 |                 tokens.append(token)
66 |             else:
67 |                 tokens.append(self.unk_token)
68 |         return tokens
69 | 
70 |     def ids_to_text(self, ids):
71 |         ids_ = [id_ for id_ in ids if id_ not in self.special_tokens]
72 |         return " ".join(self.ids_to_tokens(ids_))
73 | 


--------------------------------------------------------------------------------
/nemo/collections/cv/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.cv import datasets, losses, models, modules
16 | from nemo.package_info import __version__
17 | 
18 | # Set collection version equal to NeMo version.
19 | __version = __version__
20 | 
21 | # Authorship.
22 | __author__ = "NVIDIA Corporation"
23 | 
24 | # Set collection name.
25 | __description__ = "Computer Vision collection"
26 | 


--------------------------------------------------------------------------------
/nemo/collections/cv/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.cv.datasets.mnist_dataset import MNISTDataset, MNISTDatasetConfig
16 | 


--------------------------------------------------------------------------------
/nemo/collections/cv/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.cv.losses.nll_loss import NLLLoss
16 | 


--------------------------------------------------------------------------------
/nemo/collections/cv/losses/nll_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Optional
16 | 
17 | from torch.nn import NLLLoss as torch_NLLLoss
18 | 
19 | from nemo.core.classes import Serialization, Typing, typecheck
20 | from nemo.core.neural_types import ClassificationTarget, LogprobsType, LossType, NeuralType
21 | from nemo.utils.decorators import experimental
22 | 
23 | 
24 | @experimental
25 | class NLLLoss(torch_NLLLoss, Serialization, Typing):
26 |     """ Class representing a simple NLL loss. """
27 | 
28 |     def __init__(self, name: Optional[str] = None):
29 |         """
30 |         Constructor.
31 | 
32 |         Args:
33 |             name: Name of the module (DEFAULT: None)
34 |         """
35 |         # Call the base constructors.
36 |         # Serialization.__init__(self, name=name)
37 |         torch_NLLLoss.__init__(self)
38 | 
39 |     @property
40 |     def input_types(self):
41 |         """ Returns definitions of module input ports. """
42 |         return {
43 |             "predictions": NeuralType(axes=('B', 'ANY'), elements_type=LogprobsType()),
44 |             "targets": NeuralType(axes=('B'), elements_type=ClassificationTarget()),
45 |         }
46 | 
47 |     @property
48 |     def output_types(self):
49 |         """ Returns definitions of module output ports. """
50 |         return {"loss": NeuralType(elements_type=LossType())}
51 | 
52 |     @typecheck()
53 |     def forward(self, predictions, targets):
54 |         return torch_NLLLoss().forward(input=predictions, target=targets)
55 | 


--------------------------------------------------------------------------------
/nemo/collections/cv/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.cv.models.mnist_lenet5 import MNISTLeNet5, MNISTLeNet5Config
16 | 


--------------------------------------------------------------------------------
/nemo/collections/cv/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.cv.modules.lenet5 import LeNet5
16 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp import data, models, modules
16 | from nemo.package_info import __version__
17 | 
18 | # Set collection version equal to NeMo version.
19 | __version = __version__
20 | 
21 | # Authorship.
22 | __author__ = "NVIDIA Corporation"
23 | 
24 | # Set collection name.
25 | __description__ = "Natural Language Processing collection"
26 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.data.data_utils import *
16 | from nemo.collections.nlp.data.language_modeling.l2r_lm_dataset import L2RLanguageModelingDataset
17 | from nemo.collections.nlp.data.language_modeling.lm_bert_dataset import (
18 |     BertPretrainingDataset,
19 |     BertPretrainingPreprocessedDataloader,
20 | )
21 | from nemo.collections.nlp.data.question_answering_squad.qa_dataset import SquadDataset
22 | from nemo.collections.nlp.data.token_classification.token_classification_dataset import (
23 |     BertTokenClassificationDataset,
24 |     BertTokenClassificationInferDataset,
25 | )
26 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/data_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.data.data_utils.data_preprocessing import *
16 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/glue_benchmark/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.data.glue_benchmark.glue_benchmark_dataset import GLUEDataset
16 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/intent_slot_classification/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.collections.nlp.data.intent_slot_classification.intent_slot_classification_dataset import (
17 |     IntentSlotClassificationDataset,
18 | )
19 | from nemo.collections.nlp.data.intent_slot_classification.intent_slot_classification_descriptor import (
20 |     IntentSlotDataDesc,
21 | )
22 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/language_modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.data.language_modeling.l2r_lm_dataset import L2RLanguageModelingDataset
16 | from nemo.collections.nlp.data.language_modeling.lm_bert_dataset import (
17 |     BertPretrainingDataset,
18 |     BertPretrainingPreprocessedDataloader,
19 | )
20 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/language_modeling/l2r_lm_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Pytorch Dataset for training left-to-right language models."""
16 | from typing import Optional
17 | 
18 | import numpy as np
19 | from torch.utils.data import Dataset
20 | 
21 | from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec
22 | from nemo.collections.nlp.data.data_utils import dataset_to_ids
23 | 
24 | __all__ = ['L2RLanguageModelingDataset']
25 | 
26 | 
27 | class L2RLanguageModelingDataset(Dataset):
28 |     """
29 |     Dataset for training and evaluating left-to-right language models.
30 |     
31 |     Args:
32 |         tokenizer: tokenizer, such as WordTokenizer or CharTokenizer
33 |         dataset: path to data
34 |         max_seq_length: maximum sequence length (in tokens) of input tensors
35 |         batch_step: distance (in tokens) between two successive sequences of
36 |             the text. By default, it is equal to max_seq_length which corresponds
37 |             to splitting text into disjoint segments covering full dataset
38 |     """
39 | 
40 |     def __init__(
41 |         self,
42 |         tokenizer: TokenizerSpec,
43 |         dataset: str,
44 |         max_seq_length: Optional[int] = 512,
45 |         batch_step: Optional[int] = None,
46 |     ):
47 |         self.tokenizer = tokenizer
48 |         self.max_seq_length = max_seq_length
49 |         self.batch_step = batch_step or self.max_seq_length
50 |         ids = dataset_to_ids(dataset, tokenizer, add_bos_eos=False)
51 |         self.ids = np.array([j for i in ids for j in i])
52 | 
53 |     def __len__(self):
54 |         return (len(self.ids) - self.max_seq_length) // self.batch_step
55 | 
56 |     def __getitem__(self, idx):
57 |         left = idx * self.batch_step
58 |         right = left + self.max_seq_length
59 |         src_ids = self.ids[left:right]
60 |         labels = self.ids[left + 1 : right + 1]
61 |         src_mask = (src_ids != self.tokenizer.pad_id).astype(np.float32)
62 |         return src_ids, src_mask, labels
63 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/question_answering_squad/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/text_classification/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.collections.nlp.data.text_classification.text_classification_dataset import (
17 |     TextClassificationDataset,
18 |     calc_class_weights,
19 | )
20 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/data/token_classification/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.metrics.classification_report import ClassificationReport
16 | from nemo.collections.nlp.metrics.perplexity import Perplexity
17 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/metrics/perplexity.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | import torch
18 | from pytorch_lightning.metrics import TensorMetric
19 | 
20 | from nemo.utils import logging
21 | 
22 | __all__ = ['Perplexity']
23 | 
24 | 
25 | class Perplexity(TensorMetric):
26 |     """
27 |     This metric computes the perplexity given the language model loss.
28 |     """
29 | 
30 |     def __init__(self):
31 |         super(Perplexity, self).__init__(name="Perplexity")
32 | 
33 |     def forward(self, loss: torch.Tensor) -> torch.Tensor:
34 |         return torch.exp(loss)
35 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.models.glue_benchmark.glue_benchmark_model import GLUEModel
16 | from nemo.collections.nlp.models.intent_slot_classification import IntentSlotClassificationModel
17 | from nemo.collections.nlp.models.language_modeling.bert_lm_model import BERTLMModel
18 | from nemo.collections.nlp.models.language_modeling.transformer_lm_model import TransformerLMModel
19 | from nemo.collections.nlp.models.question_answering.qa_model import QAModel
20 | from nemo.collections.nlp.models.text_classification import TextClassificationModel
21 | from nemo.collections.nlp.models.token_classification import PunctuationCapitalizationModel, TokenClassificationModel
22 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/models/glue_benchmark/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.models.glue_benchmark.glue_benchmark_model import GLUEModel
16 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/models/glue_benchmark/metrics_for_glue.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Google AI Language Team Authors and
 2 | # The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from typing import Dict, List
18 | 
19 | from scipy.stats import pearsonr, spearmanr
20 | from sklearn.metrics import f1_score, matthews_corrcoef
21 | 
22 | __all__ = ['compute_metrics']
23 | 
24 | 
25 | def accuracy(preds: List[int], labels: List[int]):
26 |     return {"acc": (preds == labels).mean()}
27 | 
28 | 
29 | def acc_and_f1(preds: List[int], labels: List[int]):
30 |     accuracy = (preds == labels).mean()
31 |     f1 = f1_score(y_true=labels, y_pred=preds)
32 |     return {"acc": accuracy, "f1": f1}
33 | 
34 | 
35 | def mcc(preds: List[int], labels: List[int]):
36 |     return {"mcc": matthews_corrcoef(labels, preds)}
37 | 
38 | 
39 | def pearson_and_spearman(preds: List[int], labels: List[int]):
40 |     pearson_corr = pearsonr(preds, labels)[0]
41 |     spearman_corr = spearmanr(preds, labels)[0]
42 |     return {"pearson": pearson_corr, "spearmanr": spearman_corr, "pear+spear av": (pearson_corr + spearman_corr) / 2}
43 | 
44 | 
45 | def compute_metrics(task_name: str, preds: List[int], labels: List[int]) -> Dict[str, float]:
46 |     """
47 |     Computes metrics for GLUE tasks
48 |     Args:
49 |         task_name: GLUE task name
50 |         preds: model predictions
51 |         labels: golden labels
52 |     Returns:
53 |         metrics
54 |     """
55 |     if len(preds) != len(labels):
56 |         raise ValueError("Predictions and labels must have the same length")
57 | 
58 |     metric_fn = accuracy
59 |     if task_name == 'cola':
60 |         metric_fn = mcc
61 |     elif task_name in ['mrpc', 'qqp']:
62 |         metric_fn = acc_and_f1
63 |     elif task_name == 'sts-b':
64 |         metric_fn = pearson_and_spearman
65 | 
66 |     return metric_fn(preds, labels)
67 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/models/intent_slot_classification/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.models.intent_slot_classification.intent_slot_classification_model import (
16 |     IntentSlotClassificationModel,
17 | )
18 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/models/language_modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.models.language_modeling.bert_lm_model import BERTLMModel
16 | from nemo.collections.nlp.models.language_modeling.transformer_lm_model import TransformerLMModel
17 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/models/question_answering/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.models.question_answering.qa_model import QAModel
16 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/models/text_classification/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.models.text_classification.text_classification_model import TextClassificationModel
16 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/models/token_classification/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.collections.nlp.models.token_classification.punctuation_capitalization_model import (
17 |     PunctuationCapitalizationModel,
18 | )
19 | from nemo.collections.nlp.models.token_classification.token_classification_model import TokenClassificationModel
20 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.collections.nlp.modules.common import (
17 |     AlbertEncoder,
18 |     BertEncoder,
19 |     BertModule,
20 |     DistilBertEncoder,
21 |     MegatronBertEncoder,
22 |     RobertaEncoder,
23 |     SequenceClassifier,
24 |     SequenceRegression,
25 |     SequenceTokenClassifier,
26 |     get_lm_model,
27 |     get_megatron_lm_models_list,
28 |     get_pretrained_lm_models_list,
29 |     get_tokenizer,
30 |     get_tokenizer_list,
31 | )
32 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Google AI Language Team Authors and
 2 | # The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from nemo.collections.nlp.modules.common.bert_module import BertModule
18 | from nemo.collections.nlp.modules.common.huggingface import (
19 |     AlbertEncoder,
20 |     BertEncoder,
21 |     DistilBertEncoder,
22 |     RobertaEncoder,
23 | )
24 | from nemo.collections.nlp.modules.common.lm_utils import (
25 |     get_lm_model,
26 |     get_megatron_lm_models_list,
27 |     get_pretrained_lm_models_list,
28 | )
29 | from nemo.collections.nlp.modules.common.megatron import MegatronBertEncoder
30 | from nemo.collections.nlp.modules.common.sequence_classifier import SequenceClassifier
31 | from nemo.collections.nlp.modules.common.sequence_regression import SequenceRegression
32 | from nemo.collections.nlp.modules.common.sequence_token_classifier import SequenceTokenClassifier
33 | from nemo.collections.nlp.modules.common.token_classifier import BertPretrainingTokenClassifier, TokenClassifier
34 | from nemo.collections.nlp.modules.common.tokenizer_utils import get_tokenizer, get_tokenizer_list
35 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/huggingface/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.modules.common.huggingface.albert import AlbertEncoder
16 | from nemo.collections.nlp.modules.common.huggingface.auto import AutoModelEncoder
17 | from nemo.collections.nlp.modules.common.huggingface.bert import BertEncoder
18 | from nemo.collections.nlp.modules.common.huggingface.distilbert import DistilBertEncoder
19 | from nemo.collections.nlp.modules.common.huggingface.huggingface_utils import (
20 |     get_huggingface_lm_model,
21 |     get_huggingface_pretrained_lm_models_list,
22 | )
23 | from nemo.collections.nlp.modules.common.huggingface.roberta import RobertaEncoder
24 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/huggingface/albert.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Google AI Language Team Authors and
 2 | # The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from transformers import AlbertModel
18 | 
19 | from nemo.collections.nlp.modules.common.bert_module import BertModule
20 | from nemo.core.classes import typecheck
21 | 
22 | __all__ = ['AlbertEncoder']
23 | 
24 | 
25 | class AlbertEncoder(AlbertModel, BertModule):
26 |     """
27 |     Wraps around the Huggingface transformers implementation repository for easy use within NeMo.
28 |     """
29 | 
30 |     @typecheck()
31 |     def forward(self, input_ids, attention_mask, token_type_ids):
32 |         res = super().forward(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)[0]
33 |         return res
34 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/huggingface/auto.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Google AI Language Team Authors and
 2 | # The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from transformers import AutoModel, PreTrainedModel
18 | 
19 | from nemo.collections.nlp.modules.common.bert_module import BertModule
20 | from nemo.utils.decorators import experimental
21 | 
22 | __all__ = ['AutoModelEncoder']
23 | 
24 | 
25 | @experimental
26 | class AutoModelEncoder(PreTrainedModel, BertModule):
27 |     """
28 |     Wraps around the Huggingface transformers implementation repository for easy use within NeMo.
29 |     """
30 | 
31 |     def __init__(self, pretrained_model_name_or_path):
32 |         BertModule.__init__(self)
33 |         lm_model = AutoModel.from_pretrained(pretrained_model_name_or_path)
34 |         PreTrainedModel.__init__(self, config=lm_model.config)
35 |         self.lm_model = lm_model
36 |         self.type = type(lm_model)
37 | 
38 |     def forward(self, **kwargs):
39 |         unexpected_keys = set(kwargs.keys()) - set(self.lm_model.forward.__code__.co_varnames)
40 | 
41 |         for key in unexpected_keys:
42 |             del kwargs[key]
43 |         res = self.lm_model.forward(**kwargs)[0]
44 |         return res
45 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/huggingface/bert.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Google AI Language Team Authors and
 2 | # The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from transformers import BertModel
18 | 
19 | from nemo.collections.nlp.modules.common.bert_module import BertModule
20 | from nemo.core.classes import typecheck
21 | 
22 | __all__ = ['BertEncoder']
23 | 
24 | 
25 | class BertEncoder(BertModel, BertModule):
26 |     """
27 |     Wraps around the Huggingface transformers implementation repository for easy use within NeMo.
28 |     """
29 | 
30 |     @typecheck()
31 |     def forward(self, input_ids, attention_mask, token_type_ids):
32 |         res = super().forward(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)[0]
33 |         return res
34 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/huggingface/distilbert.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The Google AI Language Team Authors and
 2 | # The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from transformers import DistilBertModel
18 | 
19 | from nemo.collections.nlp.modules.common.bert_module import BertModule
20 | from nemo.core.classes import typecheck
21 | 
22 | __all__ = ['DistilBertEncoder']
23 | 
24 | 
25 | class DistilBertEncoder(DistilBertModel, BertModule):
26 |     """
27 |     Wraps around the Huggingface transformers implementation repository for easy use within NeMo.
28 |     """
29 | 
30 |     @typecheck()
31 |     def forward(self, input_ids, attention_mask, token_type_ids=None):
32 |         # distilBert does not use token_type_ids as the most of the other Bert models
33 |         res = super().forward(input_ids=input_ids, attention_mask=attention_mask)[0]
34 |         return res
35 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/huggingface/roberta.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The Google AI Language Team Authors and
 2 | # The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | from transformers import RobertaModel
19 | 
20 | from nemo.collections.nlp.modules.common.bert_module import BertModule
21 | from nemo.core.classes import typecheck
22 | 
23 | __all__ = ['RobertaEncoder']
24 | 
25 | 
26 | class RobertaEncoder(RobertaModel, BertModule):
27 |     """
28 |     Wraps around the Huggingface transformers implementation repository for easy use within NeMo.
29 |     """
30 | 
31 |     @typecheck()
32 |     def forward(self, input_ids, token_type_ids, attention_mask):
33 |         res = super().forward(input_ids=input_ids, attention_mask=attention_mask)[0]
34 |         return res
35 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.modules.common.megatron.megatron_bert import MegatronBertEncoder
16 | from nemo.collections.nlp.modules.common.megatron.megatron_utils import (
17 |     get_megatron_checkpoint,
18 |     get_megatron_lm_models_list,
19 | )
20 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/modules/common/transformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.nlp.modules.common.transformer.transformer_decoders import *
16 | from nemo.collections.nlp.modules.common.transformer.transformer_encoders import *
17 | from nemo.collections.nlp.modules.common.transformer.transformer_generators import *
18 | from nemo.collections.nlp.modules.common.transformer.transformer_modules import *
19 | 


--------------------------------------------------------------------------------
/nemo/collections/nlp/parts/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.collections.nlp.parts.utils_funcs import list2str, tensor2list
17 | 


--------------------------------------------------------------------------------
/nemo/collections/tts/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import nemo.collections.tts.data
16 | import nemo.collections.tts.helpers
17 | import nemo.collections.tts.models
18 | 


--------------------------------------------------------------------------------
/nemo/collections/tts/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import nemo.collections.tts.data.datalayers
16 | 


--------------------------------------------------------------------------------
/nemo/collections/tts/helpers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import nemo.collections.tts.helpers.helpers
16 | 


--------------------------------------------------------------------------------
/nemo/collections/tts/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import nemo.collections.tts.losses.tacotron2loss
16 | import nemo.collections.tts.losses.waveglowloss
17 | 


--------------------------------------------------------------------------------
/nemo/collections/tts/losses/waveglowloss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | 
16 | from nemo.core.classes import Loss, typecheck
17 | from nemo.core.neural_types.elements import LossType, NormalDistributionSamplesType, VoidType
18 | from nemo.core.neural_types.neural_type import NeuralType
19 | 
20 | 
21 | class WaveGlowLoss(Loss):
22 |     """ A Loss module that computes loss for WaveGlow
23 |     """
24 | 
25 |     @property
26 |     def input_types(self):
27 |         return {
28 |             "z": NeuralType(('B', 'flowgroup', 'T'), NormalDistributionSamplesType()),
29 |             "log_s_list": NeuralType(('B', 'flowgroup', 'T'), VoidType()),  # TODO: Figure out a good typing
30 |             "log_det_W_list": NeuralType(elements_type=VoidType()),  # TODO: Figure out a good typing
31 |             "sigma": NeuralType(optional=True),
32 |         }
33 | 
34 |     @property
35 |     def output_types(self):
36 |         return {
37 |             "loss": NeuralType(elements_type=LossType()),
38 |         }
39 | 
40 |     @typecheck()
41 |     def forward(self, *, z, log_s_list, log_det_W_list, sigma=1.0):
42 |         for i, log_s in enumerate(log_s_list):
43 |             if i == 0:
44 |                 log_s_total = torch.sum(log_s)
45 |                 log_det_W_total = log_det_W_list[i]
46 |             else:
47 |                 log_s_total = log_s_total + torch.sum(log_s)
48 |                 log_det_W_total += log_det_W_list[i]
49 | 
50 |         loss = torch.sum(z * z) / (2 * sigma * sigma) - log_s_total - log_det_W_total
51 |         return loss / (z.size(0) * z.size(1) * z.size(2))
52 | 


--------------------------------------------------------------------------------
/nemo/collections/tts/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.tts.models.glow_tts import GlowTTSModel
16 | from nemo.collections.tts.models.squeezewave import SqueezeWaveModel
17 | from nemo.collections.tts.models.tacotron2 import Tacotron2Model
18 | from nemo.collections.tts.models.waveglow import WaveGlowModel
19 | 
20 | __all__ = ["GlowTTSModel", "SqueezeWaveModel", "Tacotron2Model", "WaveGlowModel"]
21 | 


--------------------------------------------------------------------------------
/nemo/collections/tts/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.collections.tts.modules.denoiser import SqueezeWaveDenoiser
16 | from nemo.collections.tts.modules.glow_tts import GlowTTSModule
17 | from nemo.collections.tts.modules.squeezewave import SqueezeWaveModule
18 | from nemo.collections.tts.modules.tacotron2 import Decoder as Taco2Decoder
19 | from nemo.collections.tts.modules.tacotron2 import Encoder as Taco2Encoder
20 | from nemo.collections.tts.modules.tacotron2 import Postnet as Taco2Postnet
21 | from nemo.collections.tts.modules.waveglow import WaveGlowModule
22 | 


--------------------------------------------------------------------------------
/nemo/collections/tts/modules/denoiser.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | from nemo.collections.asr.parts.features import STFTExactPad
18 | from nemo.collections.tts.modules.squeezewave import OperationMode
19 | 
20 | 
21 | class SqueezeWaveDenoiser(torch.nn.Module):
22 |     def __init__(self, model, n_mel=80, filter_length=1024, hop_length=256, win_length=1024, window='hann'):
23 |         super().__init__()
24 |         assert hasattr(model, 'squeezewave')
25 | 
26 |         self.stft = STFTExactPad(
27 |             filter_length=filter_length, hop_length=hop_length, win_length=win_length, window=window,
28 |         ).to(model.device)
29 | 
30 |         with torch.no_grad():
31 |             spect = torch.zeros((1, n_mel, 88)).to(model.device)
32 |             bias_audio = model.convert_spectrogram_to_audio(spect=spect, sigma=0.0)
33 |             bias_spect, _ = self.stft.transform(bias_audio)
34 |             self.bias_spect = bias_spect[:, :, 0][:, :, None]
35 | 
36 |         # Reset mode to validation since `model.convert_spectrogram_to_audio` sets it to infer
37 |         model.mode = OperationMode.validation
38 |         model.squeezewave.mode = OperationMode.validation
39 | 
40 |     def forward(self, audio, strength=0.1):
41 |         audio_spect, audio_angles = self.stft.transform(audio)
42 |         audio_spect_denoised = audio_spect - self.bias_spect * strength
43 |         audio_spect_denoised = torch.clamp(audio_spect_denoised, 0.0)
44 |         audio_denoised = self.stft.inverse(audio_spect_denoised, audio_angles)
45 |         return audio_denoised
46 | 


--------------------------------------------------------------------------------
/nemo/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | NEMO_ENV_VARNAME_ENABLE_COLORING = "NEMO_ENABLE_COLORING"
16 | NEMO_ENV_VARNAME_REDIRECT_LOGS_TO_STDERR = "NEMO_REDIRECT_LOGS_TO_STDERR"
17 | NEMO_ENV_VARNAME_TESTING = "NEMO_TESTING"  # Set to True to enable nemo.util.logging's debug mode
18 | NEMO_ENV_VARNAME_VERSION = "NEMO_EXPM_VERSION"  # Used for nemo.utils.exp_manager versioning
19 | 


--------------------------------------------------------------------------------
/nemo/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import nemo.core.neural_types
16 | from nemo.core.classes import *
17 | 


--------------------------------------------------------------------------------
/nemo/core/classes/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.core.classes.common import FileIO, Model, Serialization, Typing, is_typecheck_enabled, typecheck
17 | from nemo.core.classes.dataset import Dataset, IterableDataset
18 | from nemo.core.classes.exportable import Exportable, ExportFormat
19 | from nemo.core.classes.loss import Loss
20 | from nemo.core.classes.modelPT import ModelPT
21 | from nemo.core.classes.module import NeuralModule
22 | 


--------------------------------------------------------------------------------
/nemo/core/classes/loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | from nemo.core.classes.common import Serialization, Typing
18 | 
19 | __all__ = ['Loss']
20 | 
21 | 
22 | class Loss(torch.nn.modules.loss._Loss, Typing, Serialization):
23 |     """Inherit this class to implement custom loss."""
24 | 
25 |     def __init__(self, **kwargs):
26 |         super(Loss, self).__init__(**kwargs)
27 | 


--------------------------------------------------------------------------------
/nemo/core/classes/module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from torch.nn import Module
16 | 
17 | from nemo.core.classes.common import FileIO, Serialization, Typing
18 | 
19 | __all__ = ['NeuralModule']
20 | 
21 | 
22 | class NeuralModule(Module, Typing, Serialization, FileIO):
23 |     """
24 |     Abstract class offering interface shared between all PyTorch Neural Modules.
25 |     """
26 | 
27 |     @property
28 |     def num_weights(self):
29 |         return sum(p.numel() for p in self.parameters() if p.requires_grad)
30 | 
31 |     def input_example(self):
32 |         """
33 |         Override this method if random inputs won't work
34 |         Returns:
35 |             A tuple sample of valid input data.
36 |         """
37 | 
38 |         return
39 | 


--------------------------------------------------------------------------------
/nemo/core/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.core.config.base_config import Config
16 | from nemo.core.config.optimizers import (
17 |     AdadeltaParams,
18 |     AdagradParams,
19 |     AdamaxParams,
20 |     AdamParams,
21 |     AdamWParams,
22 |     NovogradParams,
23 |     OptimizerParams,
24 |     RMSpropParams,
25 |     RpropParams,
26 |     SGDParams,
27 |     get_optimizer_config,
28 |     register_optimizer_params,
29 | )
30 | from nemo.core.config.pytorch import DataLoaderConfig
31 | from nemo.core.config.pytorch_lightning import TrainerConfig
32 | from nemo.core.config.schedulers import (
33 |     CosineAnnealingParams,
34 |     InverseSquareRootAnnealingParams,
35 |     PolynomialDecayAnnealingParams,
36 |     PolynomialHoldDecayAnnealingParams,
37 |     SchedulerParams,
38 |     SquareAnnealingParams,
39 |     SquareRootAnnealingParams,
40 |     WarmupAnnealingParams,
41 |     WarmupHoldSchedulerParams,
42 |     WarmupSchedulerParams,
43 |     get_scheduler_config,
44 |     register_scheduler_params,
45 | )
46 | from nemo.core.config.set_config import hydra_runner, set_config
47 | 


--------------------------------------------------------------------------------
/nemo/core/config/base_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | from typing import Optional
17 | 
18 | __all__ = ['Config']
19 | 
20 | 
21 | @dataclass
22 | class Config:
23 |     """
24 |     Abstract NeMo Configuration class.
25 | 
26 |     Args:
27 |         name: name of the module/dataset/loss/model object (used in serialization, DEFAULT: None)
28 |     """
29 | 
30 |     name: Optional[str] = None
31 | 


--------------------------------------------------------------------------------
/nemo/core/config/pytorch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | from typing import Any, Optional
17 | 
18 | from omegaconf import MISSING
19 | 
20 | __all__ = ['DataLoaderConfig']
21 | 
22 | 
23 | @dataclass
24 | class DataLoaderConfig:
25 |     """
26 |     Configuration of PyTorch DataLoader.
27 | 
28 |     It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).
29 | 
30 |     ..note:
31 |         For the details on the function/meanings of the arguments, please refer to:
32 |         https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader
33 |     """
34 | 
35 |     batch_size: int = MISSING
36 |     shuffle: bool = False
37 |     sampler: Optional[Any] = None
38 |     batch_sampler: Optional[Any] = None
39 |     num_workers: int = 0
40 |     collate_fn: Optional[Any] = None
41 |     pin_memory: bool = False
42 |     drop_last: bool = False
43 |     timeout: int = 0
44 |     worker_init_fn: Optional[Any] = None
45 |     multiprocessing_context: Optional[Any] = None
46 | 


--------------------------------------------------------------------------------
/nemo/core/config/pytorch_lightning.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | from typing import Any, Dict, List, Optional
17 | 
18 | __all__ = ['TrainerConfig']
19 | 
20 | 
21 | @dataclass
22 | class TrainerConfig:
23 |     """
24 |     Configuration of PyTorch Lightning Trainer.
25 | 
26 |     It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).
27 | 
28 |     ..warning:
29 |         Picked just few params of the PTL trainer for now. This needs to be discussed.
30 | 
31 |     ..note:
32 |         For the details on the function/meanings of the arguments, please refer to:
33 |         https://pytorch-lightning.readthedocs.io/en/latest/trainer.html#
34 |     """
35 | 
36 |     gradient_clip_val: float = 0
37 |     process_position: int = 0
38 |     num_nodes: int = 1
39 |     num_processes: int = 1
40 |     gpus: Optional[int] = None
41 |     auto_select_gpus: bool = False
42 |     log_gpu_memory: Optional[str] = None
43 |     progress_bar_refresh_rate: int = 1
44 |     check_val_every_n_epoch: int = 1
45 |     fast_dev_run: bool = False
46 |     max_epochs: int = 1000
47 |     min_epochs: int = 1
48 |     distributed_backend: Optional[str] = None
49 |     max_steps: Optional[int] = None
50 |     accumulate_grad_batches: int = 1
51 |     amp_level: str = "O0"
52 | 


--------------------------------------------------------------------------------
/nemo/core/neural_types/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.core.neural_types.axes import *
17 | from nemo.core.neural_types.comparison import *
18 | from nemo.core.neural_types.elements import *
19 | from nemo.core.neural_types.neural_type import *
20 | 


--------------------------------------------------------------------------------
/nemo/core/neural_types/comparison.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from enum import Enum
16 | 
17 | __all__ = ['NeuralTypeComparisonResult']
18 | 
19 | 
20 | class NeuralTypeComparisonResult(Enum):
21 |     """The result of comparing two neural type objects for compatibility.
22 |     When comparing A.compare_to(B):"""
23 | 
24 |     SAME = 0
25 |     LESS = 1  # A is B
26 |     GREATER = 2  # B is A
27 |     DIM_INCOMPATIBLE = 3  # Resize connector might fix incompatibility
28 |     TRANSPOSE_SAME = 4  # A transpose and/or converting between lists and tensors will make them same
29 |     CONTAINER_SIZE_MISMATCH = 5  # A and B contain different number of elements
30 |     INCOMPATIBLE = 6  # A and B are incompatible
31 |     SAME_TYPE_INCOMPATIBLE_PARAMS = 7  # A and B are of the same type but parametrized differently
32 |     UNCHECKED = 8  # type comparison wasn't done
33 | 


--------------------------------------------------------------------------------
/nemo/core/optim/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.core.optim.lr_scheduler import (
16 |     CosineAnnealing,
17 |     InverseSquareRootAnnealing,
18 |     PolynomialDecayAnnealing,
19 |     PolynomialHoldDecayAnnealing,
20 |     SquareAnnealing,
21 |     SquareRootAnnealing,
22 |     WarmupAnnealing,
23 |     WarmupHoldPolicy,
24 |     WarmupPolicy,
25 |     prepare_lr_scheduler,
26 | )
27 | from nemo.core.optim.novograd import Novograd
28 | from nemo.core.optim.optimizers import get_optimizer, parse_optimizer_args, register_optimizer
29 | 


--------------------------------------------------------------------------------
/nemo/package_info.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | MAJOR = 1
17 | MINOR = 0
18 | PATCH = 0
19 | PRE_RELEASE = 'b1'
20 | 
21 | # Use the following formatting: (major, minor, patch, pre-release)
22 | VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)
23 | 
24 | __shortversion__ = '.'.join(map(str, VERSION[:3]))
25 | __version__ = '.'.join(map(str, VERSION[:3])) + ''.join(VERSION[3:])
26 | 
27 | __package_name__ = 'nemo_toolkit'
28 | __contact_names__ = 'NVIDIA'
29 | __contact_emails__ = 'nemo-toolkit@nvidia.com'
30 | __homepage__ = 'https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/'
31 | __repository_url__ = 'https://github.com/nvidia/nemo'
32 | __download_url__ = 'https://github.com/NVIDIA/NeMo/releases'
33 | __description__ = 'NeMo - a toolkit for Conversational AI'
34 | __license__ = 'Apache2'
35 | __keywords__ = 'deep learning, machine learning, gpu, NLP, NeMo, nvidia, pytorch, torch, tts, speech, language'
36 | 


--------------------------------------------------------------------------------
/nemo/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.utils.nemo_logging import Logger as _Logger
17 | from nemo.utils.nemo_logging import LogMode as logging_mode
18 | from nemo.utils.lightning_logger_patch import add_memory_handlers_to_pl_logger
19 | 
20 | logging = _Logger()
21 | add_memory_handlers_to_pl_logger()
22 | 


--------------------------------------------------------------------------------
/nemo/utils/decorators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo.utils.decorators.deprecated import deprecated
17 | from nemo.utils.decorators.experimental import experimental
18 | from nemo.utils.decorators.port_docs import add_port_docs
19 | 


--------------------------------------------------------------------------------
/nemo/utils/decorators/deprecated.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | __all__ = [
17 |     'deprecated',
18 | ]
19 | 
20 | import functools
21 | 
22 | import wrapt
23 | 
24 | from nemo.utils import logging
25 | 
26 | # Remember which deprecation warnings have been printed already.
27 | _PRINTED_WARNING = {}
28 | 
29 | 
30 | def deprecated(wrapped=None, version=None, explanation=None):
31 |     """ Decorator class used for indicating that a function is deprecated and going to be removed.
32 |     Tracks down which functions printed the warning and will print it only once per function.
33 |     """
34 | 
35 |     if wrapped is None:
36 |         return functools.partial(deprecated, version=version, explanation=explanation)
37 | 
38 |     @wrapt.decorator
39 |     def wrapper(wrapped, instance, args, kwargs):
40 |         """
41 |         Method prints the adequate warning (only once per function) when
42 |         required and calls the function func, passing the original arguments,
43 |         i.e. version and explanation.
44 | 
45 |         Args:
46 |           version: Version in which the function will be removed (optional)
47 |           explanation: Additional explanation (optional), e.g. use method ``blabla instead``.
48 |         """
49 | 
50 |         # Check if we already warned about that function.
51 |         if wrapped.__name__ not in _PRINTED_WARNING.keys():
52 |             # Add to list so we won't print it again.
53 |             _PRINTED_WARNING[wrapped.__name__] = True
54 | 
55 |             # Prepare the warning message.
56 |             msg = "Function ``{}`` is deprecated.".format(wrapped.__name__)
57 | 
58 |             # Optionally, add version and alternative.
59 |             if version is not None:
60 |                 msg = msg + " It is going to be removed in "
61 |                 msg = msg + "the {} version.".format(version)
62 | 
63 |             if explanation is not None:
64 |                 msg = msg + " " + explanation
65 | 
66 |             # Display the deprecated warning.
67 |             logging.warning(msg)
68 | 
69 |         # Call the function.
70 |         return wrapped(*args, **kwargs)
71 | 
72 |     return wrapper(wrapped)
73 | 


--------------------------------------------------------------------------------
/nemo/utils/decorators/experimental.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | __all__ = ['experimental']
17 | 
18 | from nemo.utils import logging
19 | 
20 | 
21 | def experimental(cls):
22 |     """ Decorator which indicates that module is experimental.
23 |     Use it to mark experimental or research modules.
24 |     """
25 | 
26 |     def wrapped(cls):
27 |         logging.warning(
28 |             f'Module {cls} is experimental, not ready for production and is not fully supported. Use at your own risk.'
29 |         )
30 | 
31 |         return cls
32 | 
33 |     return wrapped(cls=cls)
34 | 


--------------------------------------------------------------------------------
/nemo/utils/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | class NeMoBaseException(Exception):
17 |     """ NeMo Base Exception. All exceptions created in NeMo should inherit from this class"""
18 | 
19 |     pass
20 | 


--------------------------------------------------------------------------------
/nemo/utils/formatters/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo/utils/formatters/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import sys
17 | 
18 | from nemo.constants import NEMO_ENV_VARNAME_ENABLE_COLORING
19 | from nemo.utils.env_var_parsing import get_envbool
20 | 
21 | __all__ = ["check_color_support", "to_unicode"]
22 | 
23 | 
24 | def check_color_support():
25 |     # Colors can be forced with an env variable
26 |     if not sys.platform.lower().startswith("win") and get_envbool(NEMO_ENV_VARNAME_ENABLE_COLORING, False):
27 |         return True
28 | 
29 | 
30 | def to_unicode(value):
31 |     """
32 |     Converts a string argument to a unicode string.
33 |     If the argument is already a unicode string or None, it is returned
34 |     unchanged.  Otherwise it must be a byte string and is decoded as utf8.
35 |     """
36 |     try:
37 |         if isinstance(value, (str, type(None))):
38 |             return value
39 | 
40 |         if not isinstance(value, bytes):
41 |             raise TypeError("Expected bytes, unicode, or None; got %r" % type(value))
42 | 
43 |         return value.decode("utf-8")
44 | 
45 |     except UnicodeDecodeError:
46 |         return repr(value)
47 | 


--------------------------------------------------------------------------------
/nemo/utils/get_rank.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo.utils.env_var_parsing import get_envint
16 | 
17 | 
18 | def is_global_rank_zero():
19 |     """ Helper function to determine if the current process is global_rank 0 (the main process)
20 |     """
21 |     # Try to get the pytorch RANK env var
22 |     # RANK is set by torch.distributed.launch
23 |     rank = get_envint("RANK", None)
24 |     if rank:
25 |         return rank == 0
26 | 
27 |     # If not set by pytorch, we need to determine node_rank
28 |     def get_node_rank():
29 |         # Use an equivalent of pytorch lightning's determine_ddp_node_rank()
30 |         node_rank = 0
31 |         # First check if running on a slurm cluster
32 |         # TODO: This check could probably be better
33 |         num_slurm_tasks = get_envint("SLURM_NTASKS", 0)
34 |         if num_slurm_tasks > 0:
35 |             node_rank = get_envint("SLURM_NODEID", 0)
36 |         else:
37 |             node_rank_env = get_envint("NODE_RANK", None)
38 |             group_rank = get_envint("GROUP_RANK", None)
39 |             if group_rank:
40 |                 node_rank = group_rank
41 |             # Take from NODE_RANK whenever available
42 |             if node_rank_env:
43 |                 node_rank = node_rank_env
44 |         return node_rank
45 | 
46 |     node_rank = get_node_rank()
47 |     local_rank = get_envint("LOCAL_RANK", 0)
48 |     return node_rank == 0 and local_rank == 0
49 | 


--------------------------------------------------------------------------------
/nemo/utils/lightning_logger_patch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging as _logging
16 | from logging.handlers import MemoryHandler
17 | 
18 | import pytorch_lightning as pl
19 | 
20 | HANDLERS = {}
21 | PATCHED = False
22 | 
23 | 
24 | def add_memory_handlers_to_pl_logger():
25 |     """
26 |     Adds two MemoryHandlers to pytorch_lightning's logger. These two handlers are essentially message buffers. This
27 |     function is called in nemo.utils.__init__.py. These handlers are used in add_filehandlers_to_pl_logger to flush
28 |     buffered messages to files.
29 |     """
30 |     if not HANDLERS:
31 |         HANDLERS["memory_err"] = MemoryHandler(-1)
32 |         HANDLERS["memory_err"].addFilter(lambda record: record.levelno > _logging.INFO)
33 |         HANDLERS["memory_all"] = MemoryHandler(-1)
34 |         pl._logger.addHandler(HANDLERS["memory_err"])
35 |         pl._logger.addHandler(HANDLERS["memory_all"])
36 | 
37 | 
38 | def add_filehandlers_to_pl_logger(all_log_file, err_log_file):
39 |     """
40 |     Adds two filehandlers to pytorch_lightning's logger. Called in nemo.utils.exp_manager(). The first filehandler
41 |     logs all messages to all_log_file while the second filehandler logs all WARNING and higher messages to err_log_file.
42 |     If "memory_err" and "memory_all" exist in HANDLERS, then those buffers are flushed to err_log_file and all_log_file
43 |     respectively, and then closed.
44 |     """
45 |     HANDLERS["file"] = _logging.FileHandler(all_log_file)
46 |     pl._logger.addHandler(HANDLERS["file"])
47 |     HANDLERS["file_err"] = _logging.FileHandler(err_log_file)
48 |     HANDLERS["file_err"].addFilter(lambda record: record.levelno > _logging.INFO)
49 |     pl._logger.addHandler(HANDLERS["file_err"])
50 | 
51 |     if HANDLERS.get("memory_all", None):
52 |         HANDLERS["memory_all"].setTarget(HANDLERS["file"])
53 |         HANDLERS["memory_all"].close()
54 |         del HANDLERS["memory_all"]
55 |     if HANDLERS.get("memory_err", None):
56 |         HANDLERS["memory_err"].setTarget(HANDLERS["file_err"])
57 |         HANDLERS["memory_err"].close()
58 |         del HANDLERS["memory_err"]
59 | 


--------------------------------------------------------------------------------
/nemo/utils/metaclasses.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import threading
17 | 
18 | 
19 | class Singleton(type):
20 |     """ Implementation of a generic, tread-safe singleton meta-class.
21 |         Can be used as meta-class, i.e. will create 
22 |     """
23 | 
24 |     # List of instances - one per class.
25 |     __instances = {}
26 |     # Lock used for accessing the instance.
27 |     __lock = threading.Lock()
28 | 
29 |     def __call__(cls, *args, **kwargs):
30 |         """ Returns singleton instance. A thread safe implementation. """
31 |         if cls not in cls.__instances:
32 |             # Enter critical section.
33 |             with cls.__lock:
34 |                 # Check once again.
35 |                 if cls not in cls.__instances:
36 |                     # Create a new object instance - one per class.
37 |                     cls.__instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
38 |         # Return the instance.
39 |         return cls.__instances[cls]
40 | 


--------------------------------------------------------------------------------
/reinstall.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | PIP=pip
 5 | 
 6 | echo 'Uninstalling stuff'
 7 | ${PIP} uninstall -y nemo_toolkit
 8 | 
 9 | # Kept for legacy purposes
10 | ${PIP} uninstall -y nemo_asr
11 | ${PIP} uninstall -y nemo_nlp
12 | ${PIP} uninstall -y nemo_tts
13 | ${PIP} uninstall -y nemo_simple_gan
14 | 
15 | ${PIP} install -U setuptools
16 | 
17 | for f in $(ls requirements/*.txt); do ${PIP} install ${PIP_FLAGS}--disable-pip-version-check --no-cache-dir -r $f; done 
18 | 
19 | echo 'Installing stuff'
20 | ${PIP} install -e ".[all]"
21 | 
22 | echo 'All done!'
23 | 


--------------------------------------------------------------------------------
/requirements/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.18.2
 2 | onnx>=1.7.0
 3 | pytorch-lightning==0.9.0
 4 | python-dateutil
 5 | torch
 6 | wget
 7 | wrapt
 8 | ruamel.yaml
 9 | scikit-learn
10 | omegaconf==2.0.1rc12
11 | hydra-core==1.0.0rc4
12 | transformers>=3.1.0
13 | 


--------------------------------------------------------------------------------
/requirements/requirements_asr.txt:
--------------------------------------------------------------------------------
 1 | braceexpand
 2 | editdistance
 3 | frozendict
 4 | inflect
 5 | kaldi-io
 6 | librosa
 7 | marshmallow
 8 | packaging
 9 | num2words
10 | ruamel.yaml
11 | soundfile
12 | sox
13 | torch-stft
14 | unidecode
15 | webdataset
16 | kaldi-python-io
17 | scipy
18 | pandas
19 | 


--------------------------------------------------------------------------------
/requirements/requirements_cv.txt:
--------------------------------------------------------------------------------
1 | pillow
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/requirements/requirements_docs.txt:
--------------------------------------------------------------------------------
1 | latexcodec
2 | sphinx_rtd_theme
3 | sphinxcontrib-bibtex
4 | wrapt
5 | 


--------------------------------------------------------------------------------
/requirements/requirements_nlp.txt:
--------------------------------------------------------------------------------
 1 | boto3
 2 | h5py
 3 | matplotlib>=3.3.2
 4 | sentencepiece
 5 | torchtext
 6 | unidecode
 7 | youtokentome
 8 | numpy
 9 | tqdm>=4.41.0
10 | rapidfuzz
11 | gdown
12 | megatron-lm>=1.1.4
13 | inflect
14 | 


--------------------------------------------------------------------------------
/requirements/requirements_simple_gan.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | 


--------------------------------------------------------------------------------
/requirements/requirements_test.txt:
--------------------------------------------------------------------------------
 1 | black==19.10b0
 2 | isort[requirements] < 5
 3 | parameterized
 4 | pytest
 5 | pytest-runner
 6 | ruamel.yaml
 7 | sphinx
 8 | sphinxcontrib-bibtex
 9 | wrapt
10 | wget
11 | wandb
12 | 


--------------------------------------------------------------------------------
/requirements/requirements_tts.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | pypinyin
3 | attrdict
4 | 


--------------------------------------------------------------------------------
/scripts/convasr_to_onnx.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import argparse
15 | 
16 | from nemo.collections.asr.models import EncDecClassificationModel, EncDecCTCModel, EncDecSpeakerLabelModel
17 | from nemo.utils import logging
18 | 
19 | 
20 | def get_parser():
21 |     parser = argparse.ArgumentParser(description="Convert .nemo file to encoder decoder onnx files")
22 |     parser.add_argument(
23 |         "--nemo_file", default=None, type=str, required=True, help="Path to .nemo file",
24 |     )
25 |     parser.add_argument(
26 |         "--onnx_encoder", default=None, type=str, required=True, help="Path to the onnx encoder output.",
27 |     )
28 |     parser.add_argument(
29 |         "--onnx_decoder", default=None, type=str, required=True, help="Path to the onnx decoder output.",
30 |     )
31 |     parser.add_argument(
32 |         "--model_type",
33 |         default='asr',
34 |         type=str,
35 |         choices=['asr', 'speech_label', 'speaker'],
36 |         help="Type of decoder used by the model.",
37 |     )
38 |     return parser
39 | 
40 | 
41 | def main(
42 |     nemo_file, onnx_encoder, onnx_decoder, model_type='asr',
43 | ):
44 |     if model_type == 'asr':
45 |         logging.info("Preparing encoder decoder for ASR model")
46 |         model = EncDecCTCModel.restore_from(nemo_file)
47 |     elif model_type == 'speech_label':
48 |         logging.info("Preparing encoder decoder for Speech Label Classification model")
49 |         model = EncDecClassificationModel.restore_from(nemo_file)
50 |     elif model_type == 'speaker':
51 |         logging.info("Preparing encoder decoder for Speaker Recognition model")
52 |         model = EncDecSpeakerLabelModel.restore_from(nemo_file)
53 |     else:
54 |         raise NameError("Available model names are asr, speech_label and speaker ")
55 | 
56 |     logging.info("Writing onnx encoder and decoder onnx files")
57 |     model.encoder.export(onnx_encoder)
58 |     model.decoder.export(onnx_decoder, onnx_opset_version=10)
59 |     logging.info("succesfully ported onnx files")
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     args = get_parser().parse_args()
64 |     main(
65 |         args.nemo_file, args.onnx_encoder, args.onnx_decoder, model_type=args.model_type,
66 |     )
67 | 


--------------------------------------------------------------------------------
/scripts/freesound_download_resample/freesound_requirements.txt:
--------------------------------------------------------------------------------
1 | git+git://github.com/MTG/freesound-python.git
2 | requests
3 | requests_oauthlib
4 | joblib
5 | librosa
6 | sox


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | [aliases]
16 | test=pytest
17 | 
18 | # durations=0 will display all tests execution time, sorted in ascending order starting from from the slowest one.
19 | # -vv will also display tests with durration = 0.00s
20 | [tool:pytest]
21 | addopts = --verbose --pyargs --durations=0
22 | markers =
23 |     unit: marks unit test, i.e. testing a single, well isolated functionality (deselect with '-m "not unit"')
24 |     integration: marks test checking the elements when integrated into subsystems (deselect with '-m "not integration"')
25 |     system: marks test working at the highest integration level (deselect with '-m "not system"')
26 |     acceptance: marks test checking whether the developed product/model passes the user defined acceptance criteria (deselect with '-m "not acceptance"')
27 |     docs: mark tests related to documentation (deselect with '-m "not docs"')
28 |     skipduringci: marks tests that are skipped ci as they are addressed by Jenkins jobs but should be run to test user setups
29 |     pleasefixme: marks tests that are broken and need fixing
30 | 
31 | [isort]
32 | known_localfolder = nemo,tests
33 | sections = FUTURE,STDLIB,THIRDPARTY,LOCALFOLDER
34 | default_section = THIRDPARTY
35 | #TODO tests/unit/core/test_deploy_export.py gets screwed by isort
36 | skip = setup.py, docs/source/conf.py, nemo/utils/__init__.py, tests/unit/core/test_deploy_export.py
37 | 


--------------------------------------------------------------------------------
/tests/collections/asr/test_speaker_label_models.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from unittest import TestCase
16 | 
17 | import pytest
18 | from omegaconf import DictConfig
19 | 
20 | from nemo.collections.asr.models import EncDecSpeakerLabelModel
21 | 
22 | 
23 | class EncDecSpeechLabelModelTest(TestCase):
24 |     @pytest.mark.unit
25 |     def test_constructor(self):
26 |         preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})}
27 |         encoder = {
28 |             'cls': 'nemo.collections.asr.modules.ConvASREncoder',
29 |             'params': {
30 |                 'feat_in': 64,
31 |                 'activation': 'relu',
32 |                 'conv_mask': True,
33 |                 'jasper': [
34 |                     {
35 |                         'filters': 512,
36 |                         'repeat': 1,
37 |                         'kernel': [1],
38 |                         'stride': [1],
39 |                         'dilation': [1],
40 |                         'dropout': 0.0,
41 |                         'residual': False,
42 |                         'separable': False,
43 |                     }
44 |                 ],
45 |             },
46 |         }
47 | 
48 |         decoder = {
49 |             'cls': 'nemo.collections.asr.modules.SpeakerDecoder',
50 |             'params': {'feat_in': 512, 'num_classes': 2, 'pool_mode': 'xvector', 'emb_sizes': [1024]},
51 |         }
52 | 
53 |         modelConfig = DictConfig(
54 |             {'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder)}
55 |         )
56 |         speaker_model = EncDecSpeakerLabelModel(cfg=modelConfig)
57 |         speaker_model.train()
58 |         # TODO: make proper config and assert correct number of weights
59 | 
60 |         # Check to/from config_dict:
61 |         confdict = speaker_model.to_config_dict()
62 |         instance2 = EncDecSpeakerLabelModel.from_config_dict(confdict)
63 |         self.assertTrue(isinstance(instance2, EncDecSpeakerLabelModel))
64 | 


--------------------------------------------------------------------------------
/tests/collections/nlp/test_classification_report.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from unittest import TestCase
17 | 
18 | import pytest
19 | import torch
20 | from sklearn.metrics import precision_recall_fscore_support
21 | 
22 | from nemo.collections.nlp.metrics.classification_report import ClassificationReport
23 | 
24 | 
25 | class ClassificationReportTests(TestCase):
26 |     num_classes = 3
27 |     label_ids = {'a': 0, 'b': 1, 'c': 2}
28 | 
29 |     @pytest.mark.unit
30 |     def test_classification_report(self):
31 |         classification_report_nemo = ClassificationReport(num_classes=self.num_classes, label_ids=self.label_ids)
32 | 
33 |         preds = torch.Tensor([0, 1, 1, 1, 2, 2, 0])
34 |         labels = torch.Tensor([1, 0, 0, 1, 2, 1, 0])
35 | 
36 |         tp, fp, fn = classification_report_nemo(preds, labels)
37 | 
38 |         def __convert_to_tensor(sklearn_metric):
39 |             return torch.Tensor([round(sklearn_metric * 100)])[0]
40 | 
41 |         for mode in ['macro', 'micro', 'weighted']:
42 | 
43 |             precision, recall, f1 = classification_report_nemo.get_precision_recall_f1(tp, fn, fp, mode)
44 |             pr_sklearn, recall_sklearn, f1_sklearn, _ = precision_recall_fscore_support(labels, preds, average=mode)
45 | 
46 |             self.assertEqual(torch.round(precision), __convert_to_tensor(pr_sklearn), f'wrong precision for {mode}')
47 |             self.assertEqual(torch.round(recall), __convert_to_tensor(recall_sklearn), f'wrong recall for {mode}')
48 |             self.assertEqual(torch.round(f1), __convert_to_tensor(f1_sklearn), f'wrong f1 for {mode}')
49 | 


--------------------------------------------------------------------------------
/tests/collections/nlp/test_megatron.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | try:
16 |     import apex
17 | 
18 |     apex_available = True
19 | except Exception:
20 |     apex_available = False
21 | 
22 | import os
23 | import tempfile
24 | from unittest import TestCase
25 | 
26 | import onnx
27 | import pytest
28 | import torch
29 | 
30 | import nemo.collections.nlp as nemo_nlp
31 | 
32 | 
33 | class TestMegatron(TestCase):
34 |     @pytest.mark.run_only_on('GPU')
35 |     @pytest.mark.unit
36 |     def test_list_pretrained_models(self):
37 |         pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list()
38 |         self.assertTrue(len(pretrained_lm_models) > 0)
39 | 
40 |     @pytest.mark.run_only_on('GPU')
41 |     @pytest.mark.unit
42 |     def test_get_pretrained_bert_345m_uncased_model(self):
43 |         model_name = "megatron-bert-345m-uncased"
44 |         model = nemo_nlp.modules.get_lm_model(pretrained_model_name=model_name)
45 |         if torch.cuda.is_available():
46 |             model = model.cuda()
47 | 
48 |         assert isinstance(model, nemo_nlp.modules.MegatronBertEncoder)
49 | 
50 |         if False:  #  apex_available:
51 |             model = apex.amp.initialize(model, opt_level="O2")
52 |         with tempfile.TemporaryDirectory() as tmpdir:
53 |             # Generate filename in the temporary directory.
54 |             tmp_file_name = os.path.join(model_name + ".onnx")
55 |             # Test export.
56 |             model.export(tmp_file_name, check_trace=False)
57 |             modelX = onnx.load(tmp_file_name)
58 |             with open(tmp_file_name + '.txt', 'w') as o:
59 |                 o.write('Model :\n\n{}'.format(onnx.helper.printable_graph(modelX.graph)))
60 | 


--------------------------------------------------------------------------------
/tests/collections/nlp/test_nlp_exportables.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | import tempfile
16 | 
17 | import pytest
18 | from omegaconf import DictConfig
19 | 
20 | from nemo.collections.nlp.modules.common import (
21 |     BertPretrainingTokenClassifier,
22 |     SequenceClassifier,
23 |     SequenceRegression,
24 |     SequenceTokenClassifier,
25 |     TokenClassifier,
26 | )
27 | 
28 | 
29 | def classifier_export(obj):
30 |     with tempfile.TemporaryDirectory() as tmpdir:
31 |         filename = os.path.join(tmpdir, obj.__class__.__name__ + '.onnx')
32 |         obj = obj.cuda()
33 |         obj.export(output=filename)
34 | 
35 | 
36 | class TestExportableClassifiers:
37 |     @pytest.mark.run_only_on('GPU')
38 |     @pytest.mark.unit
39 |     def test_token_classifier_export_to_onnx(self):
40 |         for num_layers in [1, 2, 4]:
41 |             classifier_export(TokenClassifier(hidden_size=256, num_layers=num_layers, num_classes=16))
42 | 
43 |     @pytest.mark.run_only_on('GPU')
44 |     @pytest.mark.unit
45 |     def test_bert_pretraining_export_to_onnx(self):
46 |         for num_layers in [1, 2, 4]:
47 |             classifier_export(TokenClassifier(hidden_size=256, num_layers=num_layers, num_classes=16))
48 | 
49 |     @pytest.mark.run_only_on('GPU')
50 |     @pytest.mark.unit
51 |     def test_sequence_token_classifier_export_to_onnx(self):
52 |         for num_layers in [1, 2, 4]:
53 |             classifier_export(
54 |                 SequenceTokenClassifier(hidden_size=256, num_slots=8, num_intents=8, num_layers=num_layers)
55 |             )
56 | 
57 |     @pytest.mark.run_only_on('GPU')
58 |     @pytest.mark.unit
59 |     def test_sequence_classifier_export_to_onnx(self):
60 |         for num_layers in [1, 2, 4]:
61 |             classifier_export(SequenceClassifier(hidden_size=256, num_classes=16, num_layers=num_layers))
62 | 
63 |     @pytest.mark.run_only_on('GPU')
64 |     @pytest.mark.unit
65 |     def test_sequence_regression_export_to_onnx(self):
66 |         for num_layers in [1, 2, 4]:
67 |             classifier_export(SequenceRegression(hidden_size=256, num_layers=num_layers))
68 | 


--------------------------------------------------------------------------------
/tests/collections/tts/test_waveglow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import tempfile
17 | from unittest import TestCase
18 | 
19 | import pytest
20 | from omegaconf import DictConfig
21 | 
22 | from nemo.collections.tts.models import WaveGlowModel
23 | from nemo.collections.tts.modules import WaveGlowModule
24 | 
25 | wcfg = DictConfig(
26 |     {
27 |         "n_flows": 12,
28 |         "n_group": 8,
29 |         "n_mel_channels": 80,
30 |         "n_early_every": 4,
31 |         "n_early_size": 2,
32 |         "n_wn_channels": 512,
33 |         "n_wn_layers": 8,
34 |         "wn_kernel_size": 3,
35 |     }
36 | )
37 | 
38 | 
39 | class TestWaveGlow:
40 |     @pytest.mark.run_only_on('GPU')
41 |     @pytest.mark.unit
42 |     def test_export_to_onnx(self):
43 |         model = WaveGlowModule(**wcfg).cuda().half()
44 |         with tempfile.TemporaryDirectory() as tmpdir:
45 |             # Generate filename in the temporary directory.
46 |             tmp_file_name = os.path.join("waveglow.onnx")
47 |             # Test export.
48 |             model.export(tmp_file_name, check_trace=False)
49 | 


--------------------------------------------------------------------------------
/tests/test_data_dir.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from os.path import exists, join
17 | 
18 | import pytest
19 | 
20 | 
21 | class TestDataDir:
22 |     @pytest.mark.unit
23 |     def test_test_data_dir(self, test_data_dir):
24 |         """" Just a dummy tests showing how to use the test_data_dir fixture. """
25 |         # test_data_dir contains the absolute path to nemo -> tests/.data
26 |         assert exists(test_data_dir)
27 |         assert exists(join(test_data_dir, "test_data.tar.gz"))
28 | 


--------------------------------------------------------------------------------
/tools/speech_data_explorer/README.md:
--------------------------------------------------------------------------------
 1 | Speech Data Explorer
 2 | --------------------
 3 | 
 4 | [Dash](https://plotly.com/dash/)-based tool for interactive exploration of ASR/TTS datasets. 
 5 | 
 6 | Features:
 7 | - dataset's statistics (alphabet, vocabulary, duration-based histograms)
 8 | - navigation across dataset (sorting, filtering)
 9 | - inspection of individual utterances (waveform, spectrogram, audio player)
10 | 
11 | Please make sure that requirements are installed. Then run:
12 | ```
13 | python data_explorer.py path_to_manifest.json
14 | ```
15 | 
16 | ![Speech Data Explorer](screenshot.png)
17 | 


--------------------------------------------------------------------------------
/tools/speech_data_explorer/requirements.txt:
--------------------------------------------------------------------------------
1 | librosa
2 | dash
3 | dash_html_components
4 | dash_bootstrap_components
5 | plotly
6 | dash_core_components
7 | dash_table
8 | numpy
9 | 


--------------------------------------------------------------------------------
/tools/speech_data_explorer/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/tools/speech_data_explorer/screenshot.png


--------------------------------------------------------------------------------