├── .dockerignore ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── question.md ├── .gitignore ├── .readthedocs.yml ├── CONTRIBUTING.md ├── Dockerfile ├── Jenkinsfile ├── README.rst ├── docs ├── .nojekyll ├── Makefile ├── source │ ├── api-docs │ │ └── nemo.rst │ ├── asr │ │ ├── api.rst │ │ ├── asr_all.bib │ │ ├── ctc_asr.png │ │ ├── datasets.rst │ │ ├── intro.rst │ │ ├── jasper_layers.png │ │ ├── jasper_vertical.png │ │ ├── models.rst │ │ └── quartz_vertical.png │ ├── common │ │ └── intro.rst │ ├── conf.py │ ├── core.rst │ ├── cv │ │ └── intro.rst │ ├── index.rst │ ├── nlp │ │ ├── api.rst │ │ ├── intro.rst │ │ └── models.rst │ ├── nvidia_theme │ │ ├── __init__.py │ │ ├── breadcrumbs.html │ │ ├── footer.html │ │ ├── layout.html │ │ ├── layout_base.html │ │ ├── search.html │ │ ├── searchbox.html │ │ ├── static │ │ │ ├── css │ │ │ │ ├── badge_only.css │ │ │ │ ├── nvidia_theme.css │ │ │ │ └── theme.css │ │ │ ├── fonts │ │ │ │ ├── Lato │ │ │ │ │ ├── fonts.css │ │ │ │ │ ├── lato-bold.ttf │ │ │ │ │ ├── lato-bold.woff │ │ │ │ │ ├── lato-bold.woff2 │ │ │ │ │ ├── lato-regular.ttf │ │ │ │ │ ├── lato-regular.woff │ │ │ │ │ └── lato-regular.woff2 │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.svg │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ └── fontawesome-webfont.woff2 │ │ │ ├── images │ │ │ │ ├── favicon.ico │ │ │ │ └── nvidia_logo.png │ │ │ └── js │ │ │ │ ├── modernizr.min.js │ │ │ │ └── theme.js │ │ ├── theme.conf │ │ └── versions.html │ └── tts │ │ ├── intro.rst │ │ ├── models.rst │ │ ├── squeezewave_wn.png │ │ └── waveglow.png └── update_docs.sh ├── examples ├── asr │ ├── conf │ │ ├── config.yaml │ │ ├── jasper_10x5dr.yaml │ │ ├── matchboxnet_3x1x64_v1.yaml │ │ ├── matchboxnet_3x1x64_v2.yaml │ │ ├── matchboxnet_3x1x64_vad.yaml │ │ ├── quartznet_15x5.yaml │ │ └── quartznet_15x5_zh.yaml │ ├── experimental │ │ └── configs │ │ │ ├── config_bpe.yaml │ │ │ ├── contextnet_bpe │ │ │ ├── contextnet_192_2x_stride.yaml │ │ │ ├── contextnet_192_4x_stride.yaml │ │ │ └── contextnet_192_8x_stride.yaml │ │ │ └── quartznet_15x5_aug.yaml │ ├── speech_to_label.py │ ├── speech_to_text.py │ ├── speech_to_text_bpe.py │ └── speech_to_text_infer.py ├── cv │ └── mnist_lenet5_image_classification_pure_lightning.py ├── nlp │ ├── glue_benchmark │ │ ├── glue_benchmark.py │ │ └── glue_benchmark_config.yaml │ ├── intent_slot_classification │ │ ├── conf │ │ │ └── intent_slot_classification_config.yaml │ │ ├── data │ │ │ ├── assistant_utils.py │ │ │ └── import_datasets.py │ │ └── intent_slot_classification.py │ ├── language_modeling │ │ ├── bert_pretraining.py │ │ ├── conf │ │ │ ├── bert_pretraining_from_preprocessed_config.yaml │ │ │ ├── bert_pretraining_from_text_config.yaml │ │ │ └── transformer_lm_config.yaml │ │ ├── convert_weights_to_nemo1.0.py │ │ ├── get_wkt2.sh │ │ └── transformer_lm.py │ ├── question_answering │ │ ├── conf │ │ │ └── question_answering_squad_config.yaml │ │ ├── get_squad.py │ │ └── question_answering_squad.py │ ├── text_classification │ │ ├── conf │ │ │ └── text_classification_config.yaml │ │ ├── data │ │ │ └── import_datasets.py │ │ └── text_classification_with_bert.py │ └── token_classification │ │ ├── conf │ │ ├── punctuation_capitalization_config.yaml │ │ └── token_classification_config.yaml │ │ ├── data │ │ ├── get_tatoeba_data.py │ │ └── import_from_iob_format.py │ │ ├── punctuation_capitalization.py │ │ └── token_classification.py ├── speaker_recognition │ ├── conf │ │ ├── SpeakerNet_recognition_3x2x512.yaml │ │ └── SpeakerNet_verification_3x2x512.yaml │ ├── speaker_reco.py │ ├── spkr_get_emb.py │ └── voxceleb_eval.py └── tts │ ├── conf │ ├── glow_tts.yaml │ ├── squeezewave.yaml │ ├── tacotron2.yaml │ └── waveglow.yaml │ ├── glow_tts.py │ ├── squeezewave.py │ ├── tacotron2.py │ ├── test_tts_infer.py │ └── waveglow.py ├── external ├── get_collections.py └── get_modules.py ├── nemo ├── README.md ├── __init__.py ├── collections │ ├── __init__.py │ ├── asr │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── audio_to_label.py │ │ │ └── audio_to_text.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── angularloss.py │ │ │ └── ctc.py │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── wer.py │ │ │ └── wer_bpe.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── asr_model.py │ │ │ ├── classification_models.py │ │ │ ├── ctc_bpe_models.py │ │ │ ├── ctc_models.py │ │ │ └── label_models.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── audio_preprocessing.py │ │ │ └── conv_asr.py │ │ └── parts │ │ │ ├── __init__.py │ │ │ ├── cleaners.py │ │ │ ├── collections.py │ │ │ ├── features.py │ │ │ ├── jasper.py │ │ │ ├── manifest.py │ │ │ ├── parsers.py │ │ │ ├── perturb.py │ │ │ ├── segment.py │ │ │ └── spectr_augment.py │ ├── common │ │ ├── __init__.py │ │ ├── callbacks │ │ │ ├── __init__.py │ │ │ └── callbacks.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── aggregator.py │ │ │ ├── cross_entropy.py │ │ │ ├── mse_loss.py │ │ │ ├── smoothed_cross_entropy.py │ │ │ └── spanning_loss.py │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ └── classification_accuracy.py │ │ ├── parts │ │ │ ├── __init__.py │ │ │ ├── multi_layer_perceptron.py │ │ │ ├── transformer_utils.py │ │ │ └── utils.py │ │ └── tokenizers │ │ │ ├── __init__.py │ │ │ ├── char_tokenizer.py │ │ │ ├── huggingface │ │ │ ├── __init__.py │ │ │ └── auto_tokenizer.py │ │ │ ├── sentencepiece_tokenizer.py │ │ │ ├── tokenizer_spec.py │ │ │ └── word_tokenizer.py │ ├── cv │ │ ├── __init__.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── mnist_dataset.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ └── nll_loss.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ └── mnist_lenet5.py │ │ └── modules │ │ │ ├── __init__.py │ │ │ └── lenet5.py │ ├── nlp │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── data_utils │ │ │ │ ├── __init__.py │ │ │ │ └── data_preprocessing.py │ │ │ ├── glue_benchmark │ │ │ │ ├── __init__.py │ │ │ │ ├── data_processors.py │ │ │ │ └── glue_benchmark_dataset.py │ │ │ ├── intent_slot_classification │ │ │ │ ├── __init__.py │ │ │ │ ├── intent_slot_classification_dataset.py │ │ │ │ └── intent_slot_classification_descriptor.py │ │ │ ├── language_modeling │ │ │ │ ├── __init__.py │ │ │ │ ├── l2r_lm_dataset.py │ │ │ │ └── lm_bert_dataset.py │ │ │ ├── question_answering_squad │ │ │ │ ├── __init__.py │ │ │ │ ├── qa_dataset.py │ │ │ │ └── qa_squad_processing.py │ │ │ ├── text_classification │ │ │ │ ├── __init__.py │ │ │ │ └── text_classification_dataset.py │ │ │ └── token_classification │ │ │ │ ├── __init__.py │ │ │ │ ├── punctuation_capitalization_dataset.py │ │ │ │ ├── token_classification_dataset.py │ │ │ │ └── token_classification_descriptor.py │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── classification_report.py │ │ │ └── perplexity.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── glue_benchmark │ │ │ │ ├── __init__.py │ │ │ │ ├── glue_benchmark_model.py │ │ │ │ └── metrics_for_glue.py │ │ │ ├── intent_slot_classification │ │ │ │ ├── __init__.py │ │ │ │ └── intent_slot_classification_model.py │ │ │ ├── language_modeling │ │ │ │ ├── __init__.py │ │ │ │ ├── bert_lm_model.py │ │ │ │ └── transformer_lm_model.py │ │ │ ├── question_answering │ │ │ │ ├── __init__.py │ │ │ │ └── qa_model.py │ │ │ ├── text_classification │ │ │ │ ├── __init__.py │ │ │ │ └── text_classification_model.py │ │ │ └── token_classification │ │ │ │ ├── __init__.py │ │ │ │ ├── punctuation_capitalization_model.py │ │ │ │ └── token_classification_model.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── common │ │ │ │ ├── __init__.py │ │ │ │ ├── bert_module.py │ │ │ │ ├── classifier.py │ │ │ │ ├── huggingface │ │ │ │ ├── __init__.py │ │ │ │ ├── albert.py │ │ │ │ ├── auto.py │ │ │ │ ├── bert.py │ │ │ │ ├── distilbert.py │ │ │ │ ├── huggingface_utils.py │ │ │ │ └── roberta.py │ │ │ │ ├── lm_utils.py │ │ │ │ ├── megatron │ │ │ │ ├── __init__.py │ │ │ │ ├── megatron_bert.py │ │ │ │ └── megatron_utils.py │ │ │ │ ├── sequence_classifier.py │ │ │ │ ├── sequence_regression.py │ │ │ │ ├── sequence_token_classifier.py │ │ │ │ ├── token_classifier.py │ │ │ │ ├── tokenizer_utils.py │ │ │ │ └── transformer │ │ │ │ ├── __init__.py │ │ │ │ ├── transformer_decoders.py │ │ │ │ ├── transformer_encoders.py │ │ │ │ ├── transformer_generators.py │ │ │ │ └── transformer_modules.py │ │ └── parts │ │ │ ├── __init__.py │ │ │ └── utils_funcs.py │ └── tts │ │ ├── __init__.py │ │ ├── data │ │ ├── __init__.py │ │ └── datalayers.py │ │ ├── helpers │ │ ├── __init__.py │ │ └── helpers.py │ │ ├── losses │ │ ├── __init__.py │ │ ├── glow_tts_loss.py │ │ ├── tacotron2loss.py │ │ └── waveglowloss.py │ │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ ├── glow_tts.py │ │ ├── squeezewave.py │ │ ├── tacotron2.py │ │ └── waveglow.py │ │ └── modules │ │ ├── __init__.py │ │ ├── denoiser.py │ │ ├── glow_tts.py │ │ ├── glow_tts_parser.py │ │ ├── glow_tts_submodules.py │ │ ├── squeezewave.py │ │ ├── squeezewave_submodules.py │ │ ├── submodules.py │ │ ├── tacotron2.py │ │ └── waveglow.py ├── constants.py ├── core │ ├── __init__.py │ ├── classes │ │ ├── __init__.py │ │ ├── common.py │ │ ├── dataset.py │ │ ├── exportable.py │ │ ├── loss.py │ │ ├── modelPT.py │ │ └── module.py │ ├── config │ │ ├── __init__.py │ │ ├── base_config.py │ │ ├── optimizers.py │ │ ├── pytorch.py │ │ ├── pytorch_lightning.py │ │ ├── schedulers.py │ │ └── set_config.py │ ├── neural_types │ │ ├── __init__.py │ │ ├── axes.py │ │ ├── comparison.py │ │ ├── elements.py │ │ └── neural_type.py │ └── optim │ │ ├── __init__.py │ │ ├── lr_scheduler.py │ │ ├── novograd.py │ │ └── optimizers.py ├── package_info.py └── utils │ ├── __init__.py │ ├── arguments.py │ ├── cloud.py │ ├── decorators │ ├── __init__.py │ ├── deprecated.py │ ├── experimental.py │ └── port_docs.py │ ├── env_var_parsing.py │ ├── exceptions.py │ ├── exp_manager.py │ ├── export_utils.py │ ├── formatters │ ├── __init__.py │ ├── base.py │ ├── colors.py │ └── utils.py │ ├── get_rank.py │ ├── lightning_logger_patch.py │ ├── metaclasses.py │ ├── model_utils.py │ └── nemo_logging.py ├── reinstall.sh ├── requirements ├── requirements.txt ├── requirements_asr.txt ├── requirements_cv.txt ├── requirements_docs.txt ├── requirements_nlp.txt ├── requirements_simple_gan.txt ├── requirements_test.txt └── requirements_tts.txt ├── scripts ├── asr_checkpoint_port.py ├── convasr_to_onnx.py ├── convert_to_tarred_audio_dataset.py ├── freesound_download_resample │ ├── download_resample_freesound.sh │ ├── freesound_download.py │ ├── freesound_requirements.txt │ └── freesound_resample.py ├── get_librispeech_data.py ├── get_openslr_rir.py ├── process_an4_data.py ├── process_asr_text_tokenizer.py ├── process_speech_commands_data.py ├── process_vad_data.py └── scp_to_manifest.py ├── setup.cfg ├── setup.py ├── tests ├── collections │ ├── asr │ │ ├── test_asr_classification_model.py │ │ ├── test_asr_ctc_encoder_model_bpe.py │ │ ├── test_asr_ctcencdec_model.py │ │ ├── test_asr_datasets.py │ │ ├── test_asr_exportables.py │ │ ├── test_asr_metrics.py │ │ ├── test_asr_modules.py │ │ └── test_speaker_label_models.py │ ├── common │ │ ├── test_metrics.py │ │ └── test_spc_tokenizer.py │ ├── nlp │ │ ├── test_classification_report.py │ │ ├── test_huggingface.py │ │ ├── test_megatron.py │ │ └── test_nlp_exportables.py │ └── tts │ │ └── test_waveglow.py ├── conftest.py ├── core │ ├── test_exp_manager.py │ ├── test_fileio.py │ ├── test_neural_types.py │ ├── test_optimizers_schedulers.py │ ├── test_serialization.py │ └── test_typecheck.py ├── manualtest_model_downloads.py └── test_data_dir.py ├── tools └── speech_data_explorer │ ├── README.md │ ├── data_explorer.py │ ├── requirements.txt │ └── screenshot.png └── tutorials ├── 00_NeMo_Primer.ipynb ├── 01_NeMo_Models.ipynb ├── NeMo_voice_swap_app.ipynb ├── asr ├── 01_ASR_with_NeMo.ipynb ├── 02_Online_ASR_Microphone_Demo.ipynb ├── 03_Speech_Commands.ipynb ├── 05_Online_Noise_Augmentation.ipynb ├── 06_Voice_Activiy_Detection.ipynb └── 07_Online_Offline_Microphone_VAD_Demo.ipynb ├── nlp ├── 01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb ├── 02_NLP_Tokenizers.ipynb ├── GLUE_Benchmark.ipynb ├── Intent_and_Slot_Classification.ipynb ├── Punctuation_and_Capitalization.ipynb ├── Question_Answering_Squad.ipynb ├── Relation_Extraction-BioMegatron.ipynb ├── Text_Classification_Sentiment_Analysis.ipynb ├── Token_Classification-BioMegatron.ipynb └── Token_Classification_Named_Entity_Recognition.ipynb ├── speaker_recognition └── Speaker_Recognition_Verification.ipynb └── tts └── 1_TTS_inference.ipynb /.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *.pyo 4 | *.pyd 5 | .Python 6 | env 7 | pip-log.txt 8 | pip-delete-this-directory.txt 9 | .tox 10 | .coverage 11 | .coverage.* 12 | .cache 13 | nosetests.xml 14 | coverage.xml 15 | *,cover 16 | *.log 17 | .git 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | A clear and concise description of what the bug is. 13 | 14 | **Steps/Code to reproduce bug** 15 | 16 | Please list *minimal* steps or code snippet for us to be able to reproduce the bug. 17 | 18 | A helpful guide on on how to craft a minimal bug report http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports. 19 | 20 | 21 | **Expected behavior** 22 | 23 | A clear and concise description of what you expected to happen. 24 | 25 | **Environment overview (please complete the following information)** 26 | 27 | - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)] 28 | - Method of NeMo install: [pip install or from source]. Please specify exact commands you used to install. 29 | - If method of install is [Docker], provide `docker pull` & `docker run` commands used 30 | 31 | **Environment details** 32 | 33 | If NVIDIA docker image is used you don't need to specify these. 34 | Otherwise, please provide: 35 | - OS version 36 | - PyTorch version 37 | - Python version 38 | 39 | **Additional context** 40 | 41 | Add any other context about the problem here. 42 | Example: GPU model 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: feature request 6 | assignees: okuchaiev 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | 12 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 13 | 14 | **Describe the solution you'd like** 15 | 16 | A clear and concise description of what you want to happen. 17 | Provide a code snippet on how new APIs/changes would be used by others. 18 | 19 | **Describe alternatives you've considered** 20 | 21 | A clear and concise description of any alternative solutions or features you've considered. 22 | 23 | **Additional context** 24 | 25 | Add any other context or screenshots about the feature request here. 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Post a question about using NeMo 4 | title: "[Question]" 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe your question** 11 | 12 | A clear and concise description of your question. 13 | Describe what you want to achieve. And/or what NeMo APIs are unclear/confusing. 14 | 15 | 16 | **Environment overview (please complete the following information)** 17 | 18 | - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)] 19 | - Method of NeMo install: [pip install or from source]. Please specify exact commands you used to install. 20 | - If method of install is [Docker], provide `docker pull` & `docker run` commands used 21 | 22 | **Environment details** 23 | 24 | If NVIDIA docker image is used you don't need to specify these. 25 | Otherwise, please provide: 26 | - OS version 27 | - PyTorch version 28 | - Python version 29 | 30 | **Additional context** 31 | 32 | Add any other context about the problem here. 33 | Example: GPU model 34 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # Copyright (c) 2020 NVIDIA. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================= 16 | 17 | # Read the Docs configuration file 18 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 19 | 20 | # Required field. 21 | version: 2 22 | 23 | # Build documentation in the docs/ directory with Sphinx. 24 | sphinx: 25 | configuration: docs/source/conf.py 26 | 27 | # Set the version of Python and requirements required to build your docs 28 | python: 29 | version: 3.7 30 | install: 31 | - requirements: requirements/requirements_docs.txt 32 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:experimental 2 | 3 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:20.08-py3 18 | 19 | 20 | # build an image that includes only the nemo dependencies, ensures that dependencies 21 | # are included first for optimal caching, and useful for building a development 22 | # image (by specifying build target as `nemo-deps`) 23 | FROM ${BASE_IMAGE} as nemo-deps 24 | 25 | # Ensure apt-get won't prompt for selecting options 26 | ENV DEBIAN_FRONTEND=noninteractive 27 | RUN apt-get update && \ 28 | apt-get install -y \ 29 | libsndfile1 sox \ 30 | python-setuptools \ 31 | python-dev ffmpeg && \ 32 | rm -rf /var/lib/apt/lists/* 33 | 34 | # build torchaudio (change latest release version to match pytorch) 35 | WORKDIR /tmp/torchaudio_build 36 | RUN git clone --depth 1 --branch release/0.6 https://github.com/pytorch/audio.git && \ 37 | cd audio && \ 38 | BUILD_SOX=1 python setup.py install && \ 39 | cd .. && rm -r audio 40 | 41 | # install nemo dependencies 42 | WORKDIR /tmp/nemo 43 | COPY requirements . 44 | RUN for f in $(ls requirements/*.txt); do pip install --disable-pip-version-check --no-cache-dir -r $f; done 45 | 46 | # copy nemo source into a scratch image 47 | FROM scratch as nemo-src 48 | COPY . . 49 | 50 | # start building the final container 51 | FROM nemo-deps as nemo 52 | ARG NEMO_VERSION=1.0.0b1 53 | 54 | # Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container 55 | # version information as runtime environment variable for introspection purposes 56 | RUN /usr/bin/test -n "$NEMO_VERSION" && \ 57 | /bin/echo "export NEMO_VERSION=${NEMO_VERSION}" >> /root/.bashrc && \ 58 | /bin/echo "export BASE_IMAGE=${BASE_IMAGE}" >> /root/.bashrc 59 | RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]" 60 | 61 | # copy scripts/examples/tests into container for end user 62 | WORKDIR /workspace/nemo 63 | COPY scripts /workspace/nemo/scripts 64 | COPY examples /workspace/nemo/examples 65 | COPY tests /workspace/nemo/tests 66 | # COPY README.rst LICENSE /workspace/nemo/ 67 | 68 | RUN printf "#!/bin/bash\njupyter lab --no-browser --allow-root --ip=0.0.0.0" >> start-jupyter.sh && \ 69 | chmod +x start-jupyter.sh 70 | 71 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/.nojekyll -------------------------------------------------------------------------------- /docs/source/api-docs/nemo.rst: -------------------------------------------------------------------------------- 1 | NeMo Core API 2 | ============= 3 | 4 | Classes and Interfaces 5 | ---------------------- 6 | 7 | .. autoclass:: nemo.core.ModelPT 8 | :show-inheritance: 9 | :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact 10 | 11 | 12 | Neural Types 13 | ------------ 14 | 15 | .. automodule:: nemo.core.neural_types.neural_type 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/source/asr/api.rst: -------------------------------------------------------------------------------- 1 | NeMo ASR collection API 2 | ======================= 3 | 4 | 5 | Model Classes 6 | ------------- 7 | 8 | .. autoclass:: nemo.collections.asr.models.EncDecCTCModel 9 | :show-inheritance: 10 | :members: transcribe, change_vocabulary, setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact 11 | 12 | 13 | .. autoclass:: nemo.collections.asr.models.EncDecClassificationModel 14 | :show-inheritance: 15 | :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact 16 | 17 | 18 | .. autoclass:: nemo.collections.asr.models.EncDecSpeakerLabelModel 19 | :show-inheritance: 20 | :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact 21 | 22 | 23 | 24 | Modules 25 | ------- 26 | 27 | .. autoclass:: nemo.collections.asr.modules.ConvASREncoder 28 | :show-inheritance: 29 | :members: 30 | 31 | .. autoclass:: nemo.collections.asr.modules.ConvASRDecoder 32 | :show-inheritance: 33 | :members: -------------------------------------------------------------------------------- /docs/source/asr/ctc_asr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/asr/ctc_asr.png -------------------------------------------------------------------------------- /docs/source/asr/intro.rst: -------------------------------------------------------------------------------- 1 | Automatic Speech Recognition (ASR) 2 | ================================== 3 | 4 | .. toctree:: 5 | :maxdepth: 8 6 | 7 | datasets 8 | models 9 | api 10 | 11 | 12 | Speech recognition tutorials can be found under ``/tutorials/asr/`` 13 | -------------------------------------------------------------------------------- /docs/source/asr/jasper_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/asr/jasper_layers.png -------------------------------------------------------------------------------- /docs/source/asr/jasper_vertical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/asr/jasper_vertical.png -------------------------------------------------------------------------------- /docs/source/asr/models.rst: -------------------------------------------------------------------------------- 1 | Models 2 | ====== 3 | 4 | Currently, NeMo's ASR collection supports the following models: 5 | 6 | .. _Jasper_model: 7 | 8 | Jasper 9 | ------ 10 | 11 | Jasper ("Just Another SPeech Recognizer") :cite:`asr-models-li2019jasper` is a deep time delay neural network (TDNN) comprising of blocks of 1D-convolutional layers. 12 | Jasper family of models are denoted as Jasper_[BxR] where B is the number of blocks, and R - the number of convolutional sub-blocks within a block. Each sub-block contains a 1-D convolution, batch normalization, ReLU, and dropout: 13 | 14 | .. image:: jasper_vertical.png 15 | :align: center 16 | :alt: japer model 17 | 18 | 19 | QuartzNet 20 | --------- 21 | 22 | QuartzNet :cite:`asr-models-kriman2019quartznet` is a version of Jasper :cite:`asr-models-li2019jasper` model with separable convolutions and larger filters. It can achieve performance 23 | similar to Jasper but with an order of magnitude less parameters. 24 | Similarly to Jasper, QuartzNet family of models are denoted as QuartzNet_[BxR] where B is the number of blocks, and R - the number of convolutional sub-blocks within a block. Each sub-block contains a 1-D *separable* convolution, batch normalization, ReLU, and dropout: 25 | 26 | .. image:: quartz_vertical.png 27 | :align: center 28 | :alt: quartznet model 29 | 30 | 31 | Jasper and QuartzNet models can be instantiated using :class:`EncDecCTCModel` class. 32 | 33 | 34 | 35 | References 36 | ---------- 37 | 38 | .. bibliography:: asr_all.bib 39 | :style: plain 40 | :labelprefix: ASR-MODELS 41 | :keyprefix: asr-models- 42 | -------------------------------------------------------------------------------- /docs/source/asr/quartz_vertical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/asr/quartz_vertical.png -------------------------------------------------------------------------------- /docs/source/common/intro.rst: -------------------------------------------------------------------------------- 1 | Common Collection 2 | ================= 3 | 4 | The common collection contains things that could be used across all collections. 5 | 6 | Tokenizers 7 | ---------- 8 | .. automodule:: nemo.collections.common.tokenizers.AutoTokenizer 9 | :special-members: __init__ 10 | .. automodule:: nemo.collections.common.tokenizers.SentencePieceTokenizer 11 | :special-members: __init__ 12 | .. automodule:: nemo.collections.common.tokenizers.TokenizerSpec 13 | :special-members: __init__ 14 | 15 | 16 | Losses 17 | ------ 18 | .. automodule:: nemo.collections.common.losses.AggregatorLoss 19 | :special-members: __init__ 20 | 21 | .. automodule:: nemo.collections.common.losses.CrossEntropyLoss 22 | :special-members: __init__ 23 | 24 | .. automodule:: nemo.collections.common.losses.MSELoss 25 | :special-members: __init__ 26 | 27 | .. automodule:: nemo.collections.common.losses.SmoothedCrossEntropyLoss 28 | :special-members: __init__ 29 | .. automodule:: nemo.collections.common.losses.SpanningLoss 30 | :special-members: __init__ 31 | 32 | -------------------------------------------------------------------------------- /docs/source/core.rst: -------------------------------------------------------------------------------- 1 | Core Concepts 2 | ============= 3 | 4 | Neural Module 5 | ~~~~~~~~~~~~~ 6 | Neural Modules are building blocks for Models. 7 | They accept (typed) inputs and return (typed) outputs. *All Neural Modules inherit from ``torch.nn.Module`` and, therefore, compatible with PyTorch ecosystem.* There are 3 types on Neural Modules: 8 | 9 | * Regular modules 10 | * Dataset/IterableDataset 11 | * Losses 12 | 13 | Model 14 | ~~~~~ 15 | NeMo Model is an entity which contains 100% of information necessary to invoke training/fine-tuning. 16 | It is based on Pytorch Lightning's LightningModule and as such contains information on: 17 | 18 | * Neural Network architecture, including necessary pre- and post- processing 19 | * How data is handled for training/validation/testing 20 | * Optimization, learning rate schedules, scaling, etc. 21 | 22 | Neural Types 23 | ~~~~~~~~~~~~ 24 | 25 | Neural Types perform semantic checks for modules and models inputs/outputs. They contain information about: 26 | 27 | * Semantics of what is stored in the tensors. For example, logits, logprobs, audiosignal, embeddings, etc. 28 | * Axes layout, semantic and (optionally) dimensionality. For example: [Batch, Time, Channel] -------------------------------------------------------------------------------- /docs/source/cv/intro.rst: -------------------------------------------------------------------------------- 1 | Computer Vision (CV) 2 | =============================== 3 | 4 | The collection contains several datasets, modules and losses useful in computer/machine vision tasks. 5 | 6 | Models 7 | ------ 8 | .. automodule:: nemo.collections.cv.models.mnist_lenet5 9 | 10 | Datasets 11 | ---------- 12 | .. automodule:: nemo.collections.cv.datasets.mnist_dataset 13 | :special-members: __init__, __len__, __getitem__ 14 | 15 | Neural Modules 16 | -------------- 17 | .. automodule:: nemo.collections.cv.modules.lenet5 18 | :special-members: __init__ 19 | 20 | Losses 21 | ------ 22 | .. automodule:: nemo.collections.cv.losses.nll_loss 23 | :special-members: __init__ 24 | 25 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | NVIDIA NeMo Developer Guide 2 | =========================== 3 | 4 | .. toctree:: 5 | :hidden: 6 | :maxdepth: 8 7 | 8 | Introduction 9 | core 10 | asr/intro 11 | cv/intro 12 | nlp/intro 13 | tts/intro 14 | common/intro 15 | api-docs/nemo 16 | 17 | 18 | NeMo is a library for easy training, building and manipulating of AI models. 19 | NeMo's current focus is providing great experience for Conversational AI. 20 | 21 | NeMo models can be trained on multi-GPU and multi-node, with or without Mixed Precision 22 | Many models in NeMo come with high-quality pre-trained checkpoints. 23 | 24 | Requirements 25 | ------------ 26 | 27 | NeMo's main requirements are: 28 | 29 | 1) Python 3.6 or 3.7 30 | 2) Pytorch 1.6 or above 31 | 32 | Installation 33 | ~~~~~~~~~~~~ 34 | ``pip install nemo_toolkit[all]==version`` 35 | 36 | We recommend using NVIDIA's PyTorch container 37 | 38 | .. code-block:: bash 39 | 40 | docker run --gpus all -it --rm -v :/NeMo --shm-size=8g \ 41 | -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \ 42 | stack=67108864 nvcr.io/nvidia/pytorch:20.06-py3 43 | 44 | 45 | -------------------------------------------------------------------------------- /docs/source/nlp/api.rst: -------------------------------------------------------------------------------- 1 | NeMo NLP collection API 2 | ======================= 3 | 4 | 5 | Model Classes 6 | ------------- 7 | 8 | .. autoclass:: nemo.collections.nlp.models.GLUEModel 9 | :show-inheritance: 10 | :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact 11 | 12 | .. autoclass:: nemo.collections.nlp.models.PunctuationCapitalizationModel 13 | :show-inheritance: 14 | :members: add_punctuation_capitalization, setup_training_data, setup_optimization, setup_validation_data, setup_test_data, multi_validation_epoch_end, register_artifact 15 | 16 | .. autoclass:: nemo.collections.nlp.models.TokenClassificationModel 17 | :show-inheritance: 18 | :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact 19 | 20 | 21 | Modules 22 | ------- 23 | 24 | .. autoclass:: nemo.collections.nlp.modules.BertModule 25 | :show-inheritance: 26 | :members: 27 | 28 | .. autoclass:: nemo.collections.nlp.modules.MegatronBertEncoder 29 | :show-inheritance: 30 | :members: 31 | 32 | .. autoclass:: nemo.collections.nlp.modules.AlbertEncoder 33 | :show-inheritance: 34 | :members: 35 | 36 | .. autoclass:: nemo.collections.nlp.modules.BertEncoder 37 | :show-inheritance: 38 | :members: 39 | 40 | .. autoclass:: nemo.collections.nlp.modules.DistilBertEncoder 41 | :show-inheritance: 42 | :members: 43 | 44 | .. autoclass:: nemo.collections.nlp.modules.RobertaEncoder 45 | :show-inheritance: 46 | :members: 47 | 48 | .. autoclass:: nemo.collections.nlp.modules.SequenceClassifier 49 | :show-inheritance: 50 | :members: 51 | 52 | .. autoclass:: nemo.collections.nlp.modules.SequenceRegression 53 | :show-inheritance: 54 | :members: 55 | 56 | .. autoclass:: nemo.collections.nlp.modules.SequenceTokenClassifier 57 | :show-inheritance: 58 | :members: 59 | 60 | .. autofunction:: nemo.collections.nlp.modules.get_pretrained_lm_model 61 | 62 | .. autofunction:: nemo.collections.nlp.modules.get_pretrained_lm_models_list -------------------------------------------------------------------------------- /docs/source/nlp/intro.rst: -------------------------------------------------------------------------------- 1 | Natural Language Processing (NLP) 2 | ================================= 3 | 4 | .. toctree:: 5 | :maxdepth: 8 6 | 7 | models 8 | api 9 | 10 | 11 | NLP tutorials can be found under ``/tutorials/nlp/`` 12 | -------------------------------------------------------------------------------- /docs/source/nlp/models.rst: -------------------------------------------------------------------------------- 1 | Models 2 | ====== 3 | 4 | NeMo's NLP collection supports the following models: 5 | 6 | * BERT pretraining 7 | * GLUE Benchmark 8 | * Joint Intent and Slot Classification 9 | * Text Classification 10 | * Name Entity Recognition (NER) 11 | * Punctuation and Capitalization 12 | * Question Answering 13 | 14 | Scripts for running these models, could be found under ``NeMo/example/nlp/``. 15 | NLP tutorials are located under ``NeMo/tutorials/nlp/``. -------------------------------------------------------------------------------- /docs/source/nvidia_theme/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Copyright (c) 2011-2019 Ryan Roemer 16 | # 17 | # Permission is hereby granted, free of charge, to any person obtaining a copy 18 | # of this software and associated documentation files (the "Software"), to deal 19 | # in the Software without restriction, including without limitation the rights 20 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 21 | # copies of the Software, and to permit persons to whom the Software is 22 | # furnished to do so, subject to the following conditions: 23 | # 24 | # The above copyright notice and this permission notice shall be included in 25 | # all copies or substantial portions of the Software. 26 | # 27 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 30 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 32 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 33 | # THE SOFTWARE. 34 | 35 | """ 36 | NVIDIA developer guide theme. 37 | 38 | Based onf the https://github.com/ryan-roemer/sphinx-bootstrap-theme. 39 | """ 40 | from os import path 41 | 42 | __version__ = '0.1.0a1' 43 | __version_full__ = __version__ 44 | 45 | 46 | def get_html_theme_path(): 47 | """Return list of HTML theme paths.""" 48 | cur_dir = path.abspath(path.dirname(path.dirname(__file__))) 49 | return cur_dir 50 | 51 | 52 | # See http://www.sphinx-doc.org/en/stable/theming.html#distribute-your-theme-as-a-python-package 53 | def setup(app): 54 | app.add_html_theme('sphinx_nvidia_theme', path.abspath(path.dirname(__file__))) 55 | -------------------------------------------------------------------------------- /docs/source/nvidia_theme/footer.html: -------------------------------------------------------------------------------- 1 |
2 | {% if (theme_prev_next_buttons_location == 'bottom' or theme_prev_next_buttons_location == 'both') and (next or prev) %} 3 | 11 | {% endif %} 12 | 13 |
14 | 15 |
16 |

17 | {%- if show_copyright %} 18 | {%- if hasdoc('copyright') %} 19 | {% trans path=pathto('copyright'), copyright=copyright|e %}© Copyright {{ copyright }}{% endtrans %} 20 | {%- else %} 21 | {% trans copyright=copyright|e %}© Copyright {{ copyright }}{% endtrans %} 22 | {%- endif %} 23 | {%- endif %} 24 | 25 | {%- if build_id and build_url %} 26 | {% trans build_url=build_url, build_id=build_id %} 27 | 28 | Build 29 | {{ build_id }}. 30 | 31 | {% endtrans %} 32 | {%- elif commit %} 33 | {% trans commit=commit %} 34 | 35 | Revision {{ commit }}. 36 | 37 | {% endtrans %} 38 | {%- elif last_updated %} 39 | 40 | {% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %} 41 | 42 | {%- endif %} 43 | 44 |

45 |
46 | 47 | {%- if show_sphinx %} 48 | {% trans %}Built with Sphinx using a theme provided by Read the Docs{% endtrans %}. 49 | {%- endif %} 50 | 51 | {%- block extrafooter %} {% endblock %} 52 | 53 |
54 | 55 | -------------------------------------------------------------------------------- /docs/source/nvidia_theme/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout_base.html" %} 2 | {% block extrahead %} 3 | 4 | {# PROJECT NAME #} 5 | {% if theme_project_name %} 6 | 11 | {% endif %} 12 | {% endblock %} 13 | -------------------------------------------------------------------------------- /docs/source/nvidia_theme/search.html: -------------------------------------------------------------------------------- 1 | {# 2 | basic/search.html 3 | ~~~~~~~~~~~~~~~~~ 4 | 5 | Template for the search page. 6 | 7 | :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS. 8 | :license: BSD, see LICENSE for details. 9 | #} 10 | {%- extends "layout.html" %} 11 | {% set title = _('Search') %} 12 | {%- block scripts %} 13 | {{ super() }} 14 | 15 | {%- endblock %} 16 | {% block footer %} 17 | 20 | {# this is used when loading the search index using $.ajax fails, 21 | such as on Chrome for documents on localhost #} 22 | 23 | {{ super() }} 24 | {% endblock %} 25 | {% block body %} 26 | 34 | 35 | {% if search_performed %} 36 | {# Translators: Search is a noun, not a verb #} 37 |

{{ _('Search Results') }}

38 | {% if not search_results %} 39 |

{{ _('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.') }}

40 | {% endif %} 41 | {% endif %} 42 |
43 | {% if search_results %} 44 |
    45 | {% for href, caption, context in search_results %} 46 |
  • 47 | {{ caption }} 48 |

    {{ context|e }}

    49 |
  • 50 | {% endfor %} 51 |
52 | {% endif %} 53 |
54 | {% endblock %} 55 | -------------------------------------------------------------------------------- /docs/source/nvidia_theme/searchbox.html: -------------------------------------------------------------------------------- 1 | {%- if builder != 'singlehtml' %} 2 |
3 |
4 | 5 | 6 | 7 |
8 |
9 | {%- endif %} 10 | -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/Lato/fonts.css: -------------------------------------------------------------------------------- 1 | /* Fonts */ 2 | 3 | @font-face { 4 | font-family: 'lato'; 5 | src: url('lato-regular.woff2?#iefix') format('woff2'), 6 | url('lato-regular.woff') format('woff'), 7 | url('lato-regular.ttf') format('truetype'); 8 | font-weight: normal; 9 | font-style: normal; 10 | } -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/images/favicon.ico -------------------------------------------------------------------------------- /docs/source/nvidia_theme/static/images/nvidia_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/nvidia_theme/static/images/nvidia_logo.png -------------------------------------------------------------------------------- /docs/source/nvidia_theme/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | stylesheet = css/nvidia_theme.css 4 | pygments_style = default 5 | 6 | [options] 7 | analytics_id = 8 | canonical_url = 9 | collapse_navigation = True 10 | display_version = True 11 | includehidden = True 12 | logo_only = 13 | logo_path = 14 | navigation_depth = 4 15 | prev_next_buttons_location = bottom 16 | project_name = 17 | project_version = 18 | sticky_navigation = True 19 | style_external_links = False 20 | titles_only = 21 | -------------------------------------------------------------------------------- /docs/source/nvidia_theme/versions.html: -------------------------------------------------------------------------------- 1 | {% if READTHEDOCS %} 2 | {# Add rst-badge after rst-versions for small badge style. #} 3 |
4 | 5 | Read the Docs 6 | v: {{ current_version }} 7 | 8 | 9 |
10 |
11 |
{{ _('Versions') }}
12 | {% for slug, url in versions %} 13 |
{{ slug }}
14 | {% endfor %} 15 |
16 |
17 |
{{ _('Downloads') }}
18 | {% for type, url in downloads %} 19 |
{{ type }}
20 | {% endfor %} 21 |
22 |
23 |
{{ _('On Read the Docs') }}
24 |
25 | {{ _('Project Home') }} 26 |
27 |
28 | {{ _('Builds') }} 29 |
30 |
31 |
32 | {% trans %}Free document hosting provided by Read the Docs.{% endtrans %} 33 | 34 |
35 |
36 | {% endif %} 37 | 38 | -------------------------------------------------------------------------------- /docs/source/tts/intro.rst: -------------------------------------------------------------------------------- 1 | Speech Synthesis 2 | ================ -------------------------------------------------------------------------------- /docs/source/tts/models.rst: -------------------------------------------------------------------------------- 1 | Models 2 | ====== 3 | 4 | Currently, NeMo's TTS collection supports the following models: 5 | 6 | .. _WaveGlow_Model: 7 | 8 | WaveGlow 9 | -------- 10 | 11 | WaveGlow :cite:`tts-models-prenger2018waveglow` is a Flow-based generative model that generates audio from mel spectrograms. 12 | Comprised of several flow steps, WaveGlow learns an invertible mapping from a simple latent space to audio waveforms. 13 | 14 | .. image:: waveglow.png 15 | :align: center 16 | :alt: waveglow model 17 | 18 | WaveGlow can be instantiated using the :class:`WaveGlowModel` class. 19 | 20 | 21 | SqueezeWave 22 | ----------- 23 | 24 | SqueezeWave :cite:`tts-models-zhai2020squeezewave` is a version of WaveGlow :cite:`tts-models-prenger2018waveglow` that simplifies the architecture of the WaveNet (WN) module by introducing depthwise separable convolutions and removing dual channels. 25 | SqueezeWave also uses larger group sizes, which reduces computation along the temporal axis and allows for less upsampling layers for mel spectrogram. 26 | 27 | .. image:: squeezewave_wn.png 28 | :align: center 29 | :alt: squeezewave vs waveglow wavenet modules 30 | 31 | SqueezeWave can be instantiated using the :class:`SqueezeWaveModel` class. 32 | 33 | 34 | References 35 | ---------- 36 | 37 | .. bibliography:: tts_all.bib 38 | :style: plain 39 | :labelprefix: TTS-MODELS 40 | :keyprefix: tts-models- 41 | -------------------------------------------------------------------------------- /docs/source/tts/squeezewave_wn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/tts/squeezewave_wn.png -------------------------------------------------------------------------------- /docs/source/tts/waveglow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/docs/source/tts/waveglow.png -------------------------------------------------------------------------------- /docs/update_docs.sh: -------------------------------------------------------------------------------- 1 | rm -rf build 2 | make clean 3 | make html 4 | -------------------------------------------------------------------------------- /examples/cv/mnist_lenet5_image_classification_pure_lightning.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import dataclass 16 | 17 | import pytorch_lightning as ptl 18 | from omegaconf import DictConfig 19 | 20 | from nemo.collections.cv.models import MNISTLeNet5, MNISTLeNet5Config 21 | from nemo.core.config import Config, TrainerConfig, set_config 22 | from nemo.utils import logging 23 | 24 | 25 | @dataclass 26 | class AppConfig(Config): 27 | """ 28 | This is structured config for this application. 29 | 30 | Args: 31 | name: Description of the application. 32 | trainer: configuration of the trainer. 33 | model: configuation of the model. 34 | """ 35 | 36 | name: str = "Training of a LeNet-5 Model using a pure PyTorchLightning approach - using DDP on 2 GPUs." 37 | trainer: TrainerConfig = TrainerConfig(gpus=2, distributed_backend="dp") 38 | model: MNISTLeNet5Config = MNISTLeNet5Config() 39 | 40 | 41 | @set_config(config=AppConfig) 42 | def main(cfg: DictConfig): 43 | # Show configuration - user can modify every parameter from command line! 44 | logging.info("Application config\n" + cfg.pretty()) 45 | 46 | # The "model" - with dataloader/dataset inside of it. 47 | lenet5 = MNISTLeNet5(cfg.model) 48 | 49 | # Setup train data loader and optimizer 50 | lenet5.setup_training_data() 51 | 52 | # Setup optimizer and scheduler 53 | lenet5.setup_optimization() 54 | 55 | # Create trainer. 56 | trainer = ptl.Trainer(**(cfg.trainer)) 57 | 58 | # Train. 59 | trainer.fit(model=lenet5) 60 | 61 | 62 | if __name__ == "__main__": 63 | main() # TODO: No cfg in function call, and no hydra runner 64 | -------------------------------------------------------------------------------- /examples/nlp/glue_benchmark/glue_benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | ## Tasks 17 | This script works with all GLUE Benchmark tasks, more details about the GLUE Benchmark could be found at 18 | https://gluebenchmark.com/ 19 | 20 | More details on how to use this script could be found in tutorials/nlp/GLUE_Benchmark.ipynb 21 | 22 | ## Model Training 23 | 24 | To train GLUEModel with the default config file, run: 25 | python glue_benchmark.py \ 26 | model.dataset.data_dir= \ 27 | model.task_name=TASK_NAME \ 28 | trainer.max_epochs= \ 29 | trainer.gpus="[] 30 | 31 | Supported task names: 32 | ["cola", "sst-2", "mrpc", "sts-b", "qqp", "mnli", "qnli", "rte", "wnli"] 33 | Note, MNLI task includes both matched and mismatched dev sets 34 | """ 35 | 36 | import pytorch_lightning as pl 37 | from omegaconf import DictConfig 38 | 39 | from nemo.collections.nlp.models import GLUEModel 40 | from nemo.core.config import hydra_runner 41 | from nemo.utils import logging 42 | from nemo.utils.exp_manager import exp_manager 43 | 44 | 45 | @hydra_runner(config_name="glue_benchmark_config") 46 | def main(cfg: DictConfig) -> None: 47 | logging.info(f'Config: {cfg.pretty()}') 48 | trainer = pl.Trainer(**cfg.trainer) 49 | exp_manager_cfg = cfg.get("exp_manager", None) 50 | if exp_manager_cfg: 51 | exp_manager_cfg.name = cfg.model.task_name 52 | logging.info(f'Setting task_name to {exp_manager_cfg.name} in exp_manager') 53 | exp_manager(trainer, exp_manager_cfg) 54 | model = GLUEModel(cfg.model, trainer=trainer) 55 | trainer.fit(model) 56 | if cfg.model.nemo_path: 57 | model.save_to(cfg.model.nemo_path) 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /examples/nlp/intent_slot_classification/intent_slot_classification.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pytorch_lightning as pl 16 | from omegaconf import DictConfig, OmegaConf 17 | 18 | from nemo.collections.nlp.models import IntentSlotClassificationModel 19 | from nemo.core.config import hydra_runner 20 | from nemo.utils import logging 21 | from nemo.utils.exp_manager import exp_manager 22 | 23 | 24 | @hydra_runner(config_path="conf", config_name="intent_slot_classification_config") 25 | def main(cfg: DictConfig) -> None: 26 | logging.info(f'Config Params:\n {OmegaConf.to_yaml(cfg)}') 27 | trainer = pl.Trainer(**cfg.trainer) 28 | exp_manager(trainer, cfg.get("exp_manager", None)) 29 | 30 | model = IntentSlotClassificationModel(cfg.model, trainer=trainer) 31 | trainer.fit(model) 32 | 33 | if cfg.model.nemo_path: 34 | model.save_to(cfg.model.nemo_path) 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /examples/nlp/language_modeling/bert_pretraining.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import pytorch_lightning as pl 17 | from omegaconf import DictConfig 18 | 19 | from nemo.collections.nlp.models.language_modeling import BERTLMModel 20 | from nemo.core.config import hydra_runner 21 | from nemo.utils import logging 22 | from nemo.utils.exp_manager import exp_manager 23 | 24 | 25 | @hydra_runner(config_path="conf", config_name="bert_pretraining_from_text_config") 26 | def main(cfg: DictConfig) -> None: 27 | logging.info(f'Config:\n {cfg.pretty()}') 28 | trainer = pl.Trainer(**cfg.trainer) 29 | exp_manager(trainer, cfg.get("exp_manager", None)) 30 | bert_model = BERTLMModel(cfg.model, trainer=trainer) 31 | trainer.fit(bert_model) 32 | if cfg.model.nemo_path: 33 | bert_model.save_to(cfg.model.nemo_path) 34 | 35 | 36 | if __name__ == '__main__': 37 | main() 38 | -------------------------------------------------------------------------------- /examples/nlp/language_modeling/conf/bert_pretraining_from_preprocessed_config.yaml: -------------------------------------------------------------------------------- 1 | # BERT Pretraining from Preprocessed (tokenized) data 2 | name: &name PretrainingBERTFromPreprocessed 3 | trainer: 4 | gpus: 8 # the number of gpus, 0 for CPU, or list with gpu indices 5 | num_nodes: 1 6 | max_steps: 2285714 # precedence over max_epochs 7 | num_sanity_val_steps: 0 # needed for bert pretraining from preproc 8 | replace_sampler_ddp: false # needed for bert pretraining from preproc 9 | accumulate_grad_batches: 1 # accumulates grads every k batches 10 | precision: 16 # 16 to use AMP 11 | amp_level: O1 # O1 or O2 if using AMP 12 | distributed_backend: ddp 13 | gradient_clip_val: 1.0 14 | row_log_interval: 1 15 | val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch 16 | checkpoint_callback: false # provided by exp_manager 17 | logger: false # provided by exp_manager 18 | 19 | model: 20 | nemo_path: null # exported .nemo path 21 | only_mlm_loss: true # only use masked language model without next sentence prediction 22 | num_tok_classification_layers: 1 # number of token classification head output layers 23 | num_seq_classification_layers: 2 # number of sequence classification head output layers 24 | 25 | 26 | language_model: 27 | pretrained_model_name: bert-base-uncased # huggingface model name 28 | lm_checkpoint: null 29 | config: 30 | attention_probs_dropout_prob: 0.1 31 | hidden_act: gelu 32 | hidden_dropout_prob: 0.1 33 | hidden_size: 768 34 | initializer_range: 0.02 35 | intermediate_size: 3072 36 | max_position_embeddings: 512 37 | num_attention_heads: 12 38 | num_hidden_layers: 12 39 | type_vocab_size: 2 40 | vocab_size: 30522 41 | config_file: null # json file, precedence over config 42 | 43 | tokenizer: null 44 | 45 | train_ds: 46 | data_file: null # path to hdf5 file (or directory) 47 | max_predictions_per_seq: 80 48 | batch_size: 16 49 | shuffle: true 50 | num_samples: -1 51 | num_workers: 2 52 | drop_last: false 53 | pin_memory: false 54 | 55 | optim: 56 | name: adamw 57 | lr: 0.4375e-4 58 | weight_decay: 0.01 59 | 60 | sched: 61 | name: SquareRootAnnealing 62 | warmup_steps: null 63 | warmup_ratio: 0.01 64 | min_lr: 0.0 65 | last_epoch: -1 66 | 67 | 68 | exp_manager: 69 | exp_dir: null # where to store logs and checkpoints 70 | name: *name # name of experiment 71 | create_tensorboard_logger: True 72 | create_checkpoint_callback: True 73 | 74 | 75 | hydra: 76 | run: 77 | dir: . 78 | job_logging: 79 | root: 80 | handlers: null -------------------------------------------------------------------------------- /examples/nlp/language_modeling/conf/transformer_lm_config.yaml: -------------------------------------------------------------------------------- 1 | # Config file for training left-to-right Transformer language model 2 | name: &name TransformerLM 3 | 4 | trainer: 5 | gpus: 1 # the number of gpus, 0 for CPU 6 | num_nodes: 1 7 | max_epochs: 2 8 | max_steps: 400 # precedence over max_epochs 9 | accumulate_grad_batches: 1 # accumulates grads every k batches 10 | amp_level: O2 # O1/O2 for mixed precision 11 | precision: 16 # Should be set to 16 for O1 and O2, default is 16 as PT ignores it when am_level is O0 12 | distributed_backend: ddp 13 | checkpoint_callback: False # Provided by exp_manager 14 | logger: False # Provided by exp_manager 15 | row_log_interval: 1 # Interval of logging. 16 | val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations 17 | resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. 18 | 19 | model: 20 | 21 | 22 | language_model: 23 | tokenizer: word 24 | special_tokens: 25 | unk_token: '' 26 | pad_token: '' 27 | bos_token: '' 28 | eos_token: '' 29 | vocab_file: ??? 30 | hidden_size: 512 31 | num_layers: 6 32 | num_attn_heads: 8 33 | inner_size: 2048 34 | max_seq_length: 256 35 | embedding_dropout: 0 36 | ffn_dropout: 0 37 | attn_score_dropout: 0 38 | attn_layer_dropout: 0 39 | 40 | dataset: 41 | max_seq_length: 256 42 | num_workers: 2 # number of workers for data loaders 43 | drop_last: false # drops the last last batch if it is smaller than the batch size 44 | pin_memory: false # enables pin_memory feature of the data loaders 45 | 46 | train_ds: 47 | file_name: ??? # path to file with training data 48 | batch_size: 32 49 | shuffle: true 50 | num_samples: -1 # number of samples to be considered, -1 means all the dataset 51 | 52 | validation_ds: 53 | file_name: ??? # path to file with validation data 54 | batch_size: 32 55 | shuffle: false 56 | num_samples: -1 # number of samples to be considered, -1 means all the dataset 57 | predict_last_k: 64 58 | 59 | optim: 60 | name: adam 61 | lr: 1e-4 62 | betas: [0.9, 0.999] 63 | weight_decay: 0 64 | 65 | sched: 66 | name: WarmupAnnealing 67 | warmup_steps: null 68 | warmup_ratio: 0.05 69 | last_epoch: -1 70 | 71 | # pytorch lightning args 72 | monitor: val_loss 73 | reduce_on_plateau: false 74 | 75 | exp_manager: 76 | exp_dir: null # where to store logs and checkpoints 77 | name: *name # name of experiment 78 | create_tensorboard_logger: True 79 | create_checkpoint_callback: True 80 | 81 | hydra: 82 | run: 83 | dir: . 84 | job_logging: 85 | root: 86 | handlers: null 87 | -------------------------------------------------------------------------------- /examples/nlp/language_modeling/convert_weights_to_nemo1.0.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | Converts BERT NeMo0.* checkpoints to NeMo1.0 format. 17 | """ 18 | 19 | from argparse import ArgumentParser 20 | 21 | import torch 22 | 23 | parser = ArgumentParser() 24 | parser.add_argument("--bert_encoder", required=True, help="path to BERT encoder, e.g. /../BERT-STEP-2285714.pt") 25 | parser.add_argument( 26 | "--bert_token_classifier", 27 | required=True, 28 | help="path to BERT token classifier, e.g. /../BertTokenClassifier-STEP-2285714.pt", 29 | ) 30 | parser.add_argument( 31 | "--bert_sequence_classifier", 32 | required=False, 33 | default=None, 34 | help="path to BERT sequence classifier, e.g /../SequenceClassifier-STEP-2285714.pt", 35 | ) 36 | parser.add_argument( 37 | "--output_path", required=False, default="converted_model.pt", help="output path to newly converted model" 38 | ) 39 | args = parser.parse_args() 40 | 41 | bert_in = torch.load(args.bert_encoder) 42 | tok_in = torch.load(args.bert_token_classifier) 43 | if args.bert_sequence_classifier: 44 | seq_in = torch.load(args.bert_sequence_classifier) 45 | 46 | new_dict = {} 47 | new_model = {"state_dict": new_dict} 48 | for k in bert_in: 49 | new_name = k.replace("bert.", "bert_model.") 50 | new_dict[new_name] = bert_in[k] 51 | 52 | for k in tok_in: 53 | new_name = "mlm_classifier." + k 54 | new_dict[new_name] = tok_in[k] 55 | 56 | if args.bert_sequence_classifier: 57 | for k in seq_in: 58 | new_name = "nsp_classifier." + k 59 | new_dict[new_name] = seq_in[k] 60 | 61 | torch.save(new_model, args.output_path) 62 | -------------------------------------------------------------------------------- /examples/nlp/language_modeling/get_wkt2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | """ 4 | This file is adapted from 5 | https://github.com/salesforce/awd-lstm-lm/blob/master/getdata.sh 6 | Copyright by the AWD LSTM authors. 7 | """ 8 | DATA_DIR=$1 9 | echo "- Downloading WikiText-2" 10 | 11 | wget --continue -P $DATA_DIR https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip 12 | unzip -q $DATA_DIR/wikitext-2-v1.zip -d $DATA_DIR 13 | cd $DATA_DIR/wikitext-2 14 | mv wiki.train.tokens train.txt 15 | sed -i -e "s//[UNK]/g" train.txt 16 | mv wiki.valid.tokens valid.txt 17 | sed -i -e "s//[UNK]/g" valid.txt 18 | mv wiki.test.tokens test.txt 19 | sed -i -e "s//[UNK]/g" test.txt 20 | cd .. 21 | rm wikitext-2-v1.zip 22 | 23 | echo "- WikiText-2 saved at $DATA_DIR/wikitext-2" 24 | -------------------------------------------------------------------------------- /examples/nlp/language_modeling/transformer_lm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import pytorch_lightning as pl 17 | from omegaconf import DictConfig 18 | 19 | from nemo.collections.nlp.models.language_modeling import TransformerLMModel 20 | from nemo.core.config import hydra_runner 21 | from nemo.utils import logging 22 | from nemo.utils.exp_manager import exp_manager 23 | 24 | 25 | @hydra_runner(config_path="conf", config_name="transformer_lm_config") 26 | def main(cfg: DictConfig) -> None: 27 | logging.info(f'Config: {cfg.pretty()}') 28 | trainer = pl.Trainer(**cfg.trainer) 29 | exp_manager(trainer, cfg.get("exp_manager", None)) 30 | transformer_lm = TransformerLMModel(cfg.model, trainer=trainer) 31 | trainer.fit(transformer_lm) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /examples/nlp/question_answering/get_squad.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | import os 17 | import urllib.request 18 | 19 | from nemo.utils import logging 20 | 21 | 22 | class SquadDownloader: 23 | def __init__(self, save_path): 24 | self.save_path = save_path + '/squad' 25 | 26 | if not os.path.exists(self.save_path): 27 | os.makedirs(self.save_path) 28 | 29 | if not os.path.exists(self.save_path + '/v1.1'): 30 | os.makedirs(self.save_path + '/v1.1') 31 | 32 | if not os.path.exists(self.save_path + '/v2.0'): 33 | os.makedirs(self.save_path + '/v2.0') 34 | 35 | self.download_urls = { 36 | 'https://rajpurkar.github.io/SQuAD-explorer' '/dataset/train-v1.1.json': 'v1.1/train-v1.1.json', 37 | 'https://rajpurkar.github.io/SQuAD-explorer' '/dataset/dev-v1.1.json': 'v1.1/dev-v1.1.json', 38 | 'https://rajpurkar.github.io/SQuAD-explorer' '/dataset/train-v2.0.json': 'v2.0/train-v2.0.json', 39 | 'https://rajpurkar.github.io/SQuAD-explorer' '/dataset/dev-v2.0.json': 'v2.0/dev-v2.0.json', 40 | } 41 | 42 | def download(self): 43 | for item in self.download_urls: 44 | url = item 45 | file = self.download_urls[item] 46 | 47 | logging.info('Downloading: %s', url) 48 | if os.path.isfile(self.save_path + '/' + file): 49 | logging.info('** Download file already exists, skipping download') 50 | else: 51 | response = urllib.request.urlopen(url) 52 | with open(self.save_path + '/' + file, "wb") as handle: 53 | handle.write(response.read()) 54 | 55 | 56 | if __name__ == '__main__': 57 | parser = argparse.ArgumentParser(description='Download Squad') 58 | parser.add_argument( 59 | '--destDir', 60 | type=str, 61 | required=False, 62 | help='directory to store data', 63 | default=os.path.split(os.path.abspath(__file__))[0], 64 | ) 65 | args = parser.parse_args() 66 | logging.info(args.destDir) 67 | squad_dl = SquadDownloader(args.destDir) 68 | squad_dl.download() 69 | -------------------------------------------------------------------------------- /examples/nlp/question_answering/question_answering_squad.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import os 17 | 18 | import pytorch_lightning as pl 19 | from omegaconf import DictConfig 20 | 21 | from nemo.collections.nlp.models.question_answering.qa_model import QAModel 22 | from nemo.core.config import hydra_runner 23 | from nemo.utils import logging 24 | from nemo.utils.exp_manager import exp_manager 25 | 26 | 27 | @hydra_runner(config_path="conf", config_name="question_answering_squad_config") 28 | def main(cfg: DictConfig) -> None: 29 | logging.info(f'Config: {cfg.pretty()}') 30 | trainer = pl.Trainer(**cfg.trainer) 31 | log_dir = exp_manager(trainer, cfg.get("exp_manager", None)) 32 | infer_datasets = [cfg.model.validation_ds, cfg.model.test_ds] 33 | for infer_dataset in infer_datasets: 34 | if infer_dataset.output_prediction_file is not None: 35 | infer_dataset.output_prediction_file = os.path.join(log_dir, infer_dataset.output_prediction_file) 36 | if infer_dataset.output_nbest_file is not None: 37 | infer_dataset.output_nbest_file = os.path.join(log_dir, infer_dataset.output_nbest_file) 38 | 39 | question_answering_model = QAModel(cfg.model, trainer=trainer) 40 | trainer.fit(question_answering_model) 41 | 42 | if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.file is not None: 43 | gpu = 1 if cfg.trainer.gpus != 0 else 0 44 | trainer = pl.Trainer(gpus=gpu) 45 | if question_answering_model.prepare_test(trainer): 46 | trainer.test(question_answering_model) 47 | 48 | if cfg.model.nemo_path: 49 | question_answering_model.save_to(cfg.model.nemo_path) 50 | 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /examples/speaker_recognition/speaker_reco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | import pytorch_lightning as pl 18 | from omegaconf.listconfig import ListConfig 19 | from pytorch_lightning import seed_everything 20 | 21 | from nemo.collections.asr.models import EncDecSpeakerLabelModel 22 | from nemo.core.config import hydra_runner 23 | from nemo.utils import logging 24 | from nemo.utils.exp_manager import exp_manager 25 | 26 | """ 27 | Basic run (on GPU for 10 epochs for 2 class training): 28 | EXP_NAME=sample_run 29 | python ./speaker_reco.py --config-path='conf' --config-name='SpeakerNet_recognition_3x2x512.yaml' \ 30 | trainer.max_epochs=10 \ 31 | model.train_ds.batch_size=64 model.validation_ds.batch_size=64 \ 32 | model.train_ds.manifest_filepath="" model.validation_ds.manifest_filepath="" \ 33 | model.test_ds.manifest_filepath="" \ 34 | trainer.gpus=1 \ 35 | model.decoder.params.num_classes=2 \ 36 | exp_manager.name=$EXP_NAME +exp_manager.use_datetime_version=False \ 37 | exp_manager.exp_dir='./speaker_exps' 38 | 39 | See https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_recognition/Speaker_Recognition_Verification.ipynb for notebook tutorial 40 | """ 41 | 42 | seed_everything(42) 43 | 44 | 45 | @hydra_runner(config_path="conf", config_name="SpeakerNet_recognition_3x2x512.yaml") 46 | def main(cfg): 47 | 48 | logging.info(f'Hydra config: {cfg.pretty()}') 49 | trainer = pl.Trainer(**cfg.trainer) 50 | log_dir = exp_manager(trainer, cfg.get("exp_manager", None)) 51 | speaker_model = EncDecSpeakerLabelModel(cfg=cfg.model, trainer=trainer) 52 | trainer.fit(speaker_model) 53 | model_path = os.path.join(log_dir, '..', 'spkr.nemo') 54 | speaker_model.save_to(model_path) 55 | 56 | if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: 57 | gpu = 1 if cfg.trainer.gpus != 0 else 0 58 | trainer = pl.Trainer(gpus=gpu) 59 | if speaker_model.prepare_test(trainer): 60 | trainer.test(speaker_model) 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /examples/speaker_recognition/spkr_get_emb.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | import pytorch_lightning as pl 18 | from omegaconf.listconfig import ListConfig 19 | from pytorch_lightning import seed_everything 20 | 21 | from nemo.collections.asr.models import ExtractSpeakerEmbeddingsModel 22 | from nemo.core.config import hydra_runner 23 | from nemo.utils import logging 24 | from nemo.utils.exp_manager import exp_manager 25 | 26 | """ 27 | To extract embeddings 28 | Place pretrained model in ${EXP_DIR}/${EXP_NAME} with spkr.nemo 29 | python spkr_get_emb.py --config-path='conf' --config-name='SpeakerNet_verification_3x2x512.yaml' \ 30 | +model.test_ds.manifest_filepath="" \ 31 | +model.test_ds.sample_rate=16000 \ 32 | +model.test_ds.labels=null \ 33 | +model.test_ds.batch_size=1 \ 34 | +model.test_ds.shuffle=False \ 35 | +model.test_ds.time_length=8 \ 36 | exp_manager.exp_name=${EXP_NAME} \ 37 | exp_manager.exp_dir=${EXP_DIR} \ 38 | trainer.gpus=1 39 | 40 | See https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_recognition/Speaker_Recognition_Verification.ipynb for notebook tutorial 41 | """ 42 | 43 | seed_everything(42) 44 | 45 | 46 | @hydra_runner(config_path="conf", config_name="config") 47 | def main(cfg): 48 | 49 | logging.info(f'Hydra config: {cfg.pretty()}') 50 | if (isinstance(cfg.trainer.gpus, ListConfig) and len(cfg.trainer.gpus) > 1) or ( 51 | isinstance(cfg.trainer.gpus, (int, str)) and int(cfg.trainer.gpus) > 1 52 | ): 53 | logging.info("changing gpus to 1 to minimize DDP issues while extracting embeddings") 54 | cfg.trainer.gpus = 1 55 | cfg.trainer.distributed_backend = None 56 | trainer = pl.Trainer(**cfg.trainer) 57 | log_dir = exp_manager(trainer, cfg.get("exp_manager", None)) 58 | model_path = os.path.join(log_dir, '..', 'spkr.nemo') 59 | speaker_model = ExtractSpeakerEmbeddingsModel.restore_from(model_path) 60 | speaker_model.setup_test_data(cfg.model.test_ds) 61 | trainer.test(speaker_model) 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /examples/tts/conf/waveglow.yaml: -------------------------------------------------------------------------------- 1 | name: &name "WaveGlow" 2 | sample_rate: &sr 22050 3 | n_fft: &n_fft 1024 4 | n_mels: &n_mels 80 5 | fmax: &fmax null 6 | pad_value: &pad_value -11.52 7 | train_dataset: ??? 8 | validation_datasets: ??? 9 | 10 | model: 11 | sigma: 1.0 12 | train_ds: 13 | dataset: 14 | cls: "nemo.collections.tts.data.datalayers.AudioDataset" 15 | params: 16 | manifest_filepath: ${train_dataset} 17 | max_duration: null 18 | min_duration: 0.1 19 | n_segments: 16000 20 | trim: false 21 | dataloader_params: 22 | drop_last: false 23 | shuffle: true 24 | batch_size: 12 25 | num_workers: 4 26 | 27 | validation_ds: 28 | dataset: 29 | cls: "nemo.collections.tts.data.datalayers.AudioDataset" 30 | params: 31 | manifest_filepath: ${validation_datasets} 32 | max_duration: null 33 | min_duration: 0.1 34 | n_segments: -1 35 | trim: false 36 | dataloader_params: 37 | drop_last: false 38 | shuffle: false 39 | batch_size: 12 40 | num_workers: 4 41 | 42 | preprocessor: 43 | cls: nemo.collections.asr.parts.features.FilterbankFeatures 44 | params: 45 | dither: 0.0 46 | nfilt: *n_mels 47 | frame_splicing: 1 48 | highfreq: *fmax 49 | log: true 50 | log_zero_guard_type: clamp 51 | log_zero_guard_value: 1e-05 52 | lowfreq: 0 53 | mag_power: 1.0 54 | n_fft: *n_fft 55 | 56 | # Waveglow is currently hardcoded to these values for window size and stride 57 | # Changing these parameters are not recommended 58 | n_window_size: 1024 59 | n_window_stride: 256 60 | 61 | normalize: null 62 | pad_to: 16 63 | pad_value: *pad_value 64 | preemph: null 65 | sample_rate: *sr 66 | stft_conv: true 67 | window: hann 68 | 69 | waveglow: 70 | cls: nemo.collections.tts.modules.waveglow.WaveGlowModule 71 | params: 72 | n_early_every: 4 73 | n_early_size: 2 74 | n_flows: 12 75 | n_group: 8 76 | n_mel_channels: *n_mels 77 | n_wn_channels: 512 78 | n_wn_layers: 8 79 | wn_kernel_size: 3 80 | 81 | optim: 82 | name: adam 83 | lr: 1e-4 84 | 85 | trainer: 86 | gpus: 1 # number of gpus 87 | max_epochs: ??? 88 | num_nodes: 1 89 | distributed_backend: ddp 90 | accumulate_grad_batches: 1 91 | checkpoint_callback: False # Provided by exp_manager 92 | logger: False # Provided by exp_manager 93 | log_save_interval: 1000 94 | row_log_interval: 200 95 | check_val_every_n_epoch: 25 96 | precision: 16 97 | 98 | exp_manager: 99 | exp_dir: null 100 | name: *name 101 | create_tensorboard_logger: True 102 | create_checkpoint_callback: True 103 | -------------------------------------------------------------------------------- /examples/tts/glow_tts.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pytorch_lightning as pl 16 | 17 | from nemo.collections.common.callbacks import LogEpochTimeCallback 18 | from nemo.collections.tts.models import GlowTTSModel 19 | from nemo.core.config import hydra_runner 20 | from nemo.utils.exp_manager import exp_manager 21 | 22 | 23 | @hydra_runner(config_path="conf", config_name="glow_tts") 24 | def main(cfg): 25 | trainer = pl.Trainer(**cfg.trainer) 26 | exp_manager(trainer, cfg.get("exp_manager", None)) 27 | model = GlowTTSModel(cfg=cfg.model, trainer=trainer) 28 | lr_logger = pl.callbacks.LearningRateLogger() 29 | epoch_time_logger = LogEpochTimeCallback() 30 | trainer.callbacks.extend([lr_logger, epoch_time_logger]) 31 | trainer.fit(model) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() # noqa pylint: disable=no-value-for-parameter 36 | -------------------------------------------------------------------------------- /examples/tts/squeezewave.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pytorch_lightning as pl 16 | 17 | from nemo.collections.common.callbacks import LogEpochTimeCallback 18 | from nemo.collections.tts.models import SqueezeWaveModel 19 | from nemo.core.config import hydra_runner 20 | from nemo.utils.exp_manager import exp_manager 21 | 22 | 23 | @hydra_runner(config_path="conf", config_name="squeezewave") 24 | def main(cfg): 25 | trainer = pl.Trainer(**cfg.trainer) 26 | exp_manager(trainer, cfg.get("exp_manager", None)) 27 | model = SqueezeWaveModel(cfg=cfg.model, trainer=trainer) 28 | epoch_time_logger = LogEpochTimeCallback() 29 | trainer.callbacks.extend([epoch_time_logger]) 30 | trainer.fit(model) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() # noqa pylint: disable=no-value-for-parameter 35 | -------------------------------------------------------------------------------- /examples/tts/tacotron2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pytorch_lightning as pl 16 | 17 | from nemo.collections.common.callbacks import LogEpochTimeCallback 18 | from nemo.collections.tts.models import Tacotron2Model 19 | from nemo.core.config import hydra_runner 20 | from nemo.utils.exp_manager import exp_manager 21 | 22 | 23 | @hydra_runner(config_path="conf", config_name="tacotron2") 24 | def main(cfg): 25 | trainer = pl.Trainer(**cfg.trainer) 26 | exp_manager(trainer, cfg.get("exp_manager", None)) 27 | model = Tacotron2Model(cfg=cfg.model, trainer=trainer) 28 | lr_logger = pl.callbacks.LearningRateLogger() 29 | epoch_time_logger = LogEpochTimeCallback() 30 | trainer.callbacks.extend([lr_logger, epoch_time_logger]) 31 | trainer.fit(model) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() # noqa pylint: disable=no-value-for-parameter 36 | -------------------------------------------------------------------------------- /examples/tts/waveglow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pytorch_lightning as pl 16 | 17 | from nemo.collections.common.callbacks import LogEpochTimeCallback 18 | from nemo.collections.tts.models import WaveGlowModel 19 | from nemo.core.config import hydra_runner 20 | from nemo.utils.exp_manager import exp_manager 21 | 22 | 23 | @hydra_runner(config_path="conf", config_name="waveglow") 24 | def main(cfg): 25 | trainer = pl.Trainer(**cfg.trainer) 26 | exp_manager(trainer, cfg.get("exp_manager", None)) 27 | model = WaveGlowModel(cfg=cfg.model, trainer=trainer) 28 | epoch_time_logger = LogEpochTimeCallback() 29 | trainer.callbacks.extend([epoch_time_logger]) 30 | trainer.fit(model) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() # noqa pylint: disable=no-value-for-parameter 35 | -------------------------------------------------------------------------------- /nemo/README.md: -------------------------------------------------------------------------------- 1 | NeMo (**Ne**ural **Mo**dules) is a toolkit for creating AI applications built around **neural modules**, conceptual blocks of neural networks that take *typed* inputs and produce *typed* outputs. 2 | 3 | **NeMo Core** provides common APIs all modules and models have to implement. 4 | 5 | **NeMo Collections** 6 | 7 | * ASR - collection of modules and models for building speech recognition networks 8 | * TTS - collection of modules and models for building speech synthesis networks 9 | * NLP - collection of modules and models for building NLP networks 10 | -------------------------------------------------------------------------------- /nemo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import os 17 | 18 | from .package_info import ( 19 | __contact_emails__, 20 | __contact_names__, 21 | __description__, 22 | __download_url__, 23 | __homepage__, 24 | __keywords__, 25 | __license__, 26 | __package_name__, 27 | __repository_url__, 28 | __shortversion__, 29 | __version__, 30 | ) 31 | 32 | if "NEMO_PACKAGE_BUILDING" not in os.environ: 33 | from nemo import core 34 | from nemo import utils 35 | from nemo import collections 36 | -------------------------------------------------------------------------------- /nemo/collections/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo/collections/asr/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.asr import data, losses, models, modules 16 | from nemo.package_info import __version__ 17 | 18 | # Set collection version equal to NeMo version. 19 | __version = __version__ 20 | 21 | # Authorship. 22 | __author__ = "NVIDIA Corporation" 23 | 24 | # Set collection name. 25 | __description__ = "Automatic Speech Recognition collection" 26 | -------------------------------------------------------------------------------- /nemo/collections/asr/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo/collections/asr/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo/collections/asr/losses/angularloss.py: -------------------------------------------------------------------------------- 1 | # ! /usr/bin/python 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | 18 | from nemo.core.classes import Loss, Typing, typecheck 19 | from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType 20 | 21 | __all__ = ['AngularSoftmaxLoss'] 22 | 23 | 24 | class AngularSoftmaxLoss(Loss, Typing): 25 | """ 26 | Computes ArcFace Angular softmax angle loss 27 | reference: https://openaccess.thecvf.com/content_CVPR_2019/papers/Deng_ArcFace_Additive_Angular_Margin_Loss_for_Deep_Face_Recognition_CVPR_2019_paper.pdf 28 | args: 29 | scale: scale value for cosine angle 30 | margin: margin value added to cosine angle 31 | """ 32 | 33 | @property 34 | def input_types(self): 35 | """Input types definitions for AnguarLoss. 36 | """ 37 | return { 38 | "logits": NeuralType(('B', 'D'), LogitsType()), 39 | "labels": NeuralType(('B',), LabelsType()), 40 | } 41 | 42 | @property 43 | def output_types(self): 44 | """Output types definitions for AngularLoss. 45 | loss: 46 | NeuralType(None) 47 | """ 48 | return {"loss": NeuralType(elements_type=LossType())} 49 | 50 | def __init__(self, scale=20.0, margin=1.35): 51 | super().__init__() 52 | 53 | self.eps = 1e-7 54 | self.scale = scale 55 | self.margin = margin 56 | 57 | @typecheck() 58 | def forward(self, logits, labels): 59 | numerator = self.scale * torch.cos( 60 | torch.acos(torch.clamp(torch.diagonal(logits.transpose(0, 1)[labels]), -1.0 + self.eps, 1 - self.eps)) 61 | + self.margin 62 | ) 63 | excl = torch.cat( 64 | [torch.cat((logits[i, :y], logits[i, y + 1 :])).unsqueeze(0) for i, y in enumerate(labels)], dim=0 65 | ) 66 | denominator = torch.exp(numerator) + torch.sum(torch.exp(self.scale * excl), dim=1) 67 | L = numerator - torch.log(denominator) 68 | return -torch.mean(L) 69 | -------------------------------------------------------------------------------- /nemo/collections/asr/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo/collections/asr/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.asr.models.asr_model import ASRModel 16 | from nemo.collections.asr.models.classification_models import EncDecClassificationModel 17 | from nemo.collections.asr.models.ctc_bpe_models import EncDecCTCModelBPE 18 | from nemo.collections.asr.models.ctc_models import EncDecCTCModel 19 | from nemo.collections.asr.models.label_models import EncDecSpeakerLabelModel, ExtractSpeakerEmbeddingsModel 20 | -------------------------------------------------------------------------------- /nemo/collections/asr/models/asr_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from abc import ABC, abstractmethod 15 | from typing import Dict, List 16 | 17 | import torch 18 | 19 | from nemo.core.classes import ModelPT 20 | 21 | __all__ = ['ASRModel'] 22 | 23 | 24 | class ASRModel(ModelPT, ABC): 25 | @abstractmethod 26 | def transcribe(self, paths2audio_files: List[str], batch_size: int = 4) -> List[str]: 27 | """ 28 | Takes paths to audio files and returns text transcription 29 | Args: 30 | paths2audio_files: paths to audio fragment to be transcribed 31 | 32 | Returns: 33 | transcription texts 34 | """ 35 | pass 36 | 37 | def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): 38 | val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() 39 | wer_num = torch.stack([x['val_wer_num'] for x in outputs]).sum() 40 | wer_denom = torch.stack([x['val_wer_denom'] for x in outputs]).sum() 41 | tensorboard_logs = {'validation_loss': val_loss_mean, 'validation_wer': wer_num / wer_denom} 42 | return {'val_loss': val_loss_mean, 'log': tensorboard_logs} 43 | 44 | def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0): 45 | val_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean() 46 | wer_num = torch.stack([x['test_wer_num'] for x in outputs]).sum() 47 | wer_denom = torch.stack([x['test_wer_denom'] for x in outputs]).sum() 48 | tensorboard_logs = {'test_loss': val_loss_mean, 'test_wer': wer_num / wer_denom} 49 | return {'test_loss': val_loss_mean, 'log': tensorboard_logs} 50 | -------------------------------------------------------------------------------- /nemo/collections/asr/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.asr.modules.audio_preprocessing import ( 16 | AudioToMelSpectrogramPreprocessor, 17 | AudioToMFCCPreprocessor, 18 | CropOrPadSpectrogramAugmentation, 19 | SpectrogramAugmentation, 20 | ) 21 | from nemo.collections.asr.modules.conv_asr import ( 22 | ConvASRDecoder, 23 | ConvASRDecoderClassification, 24 | ConvASREncoder, 25 | SpeakerDecoder, 26 | ) 27 | -------------------------------------------------------------------------------- /nemo/collections/asr/parts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo/collections/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import nemo.collections.common.callbacks 16 | from nemo.collections.common import losses, parts, tokenizers 17 | from nemo.package_info import __version__ 18 | 19 | # Set collection version equal to NeMo version. 20 | __version = __version__ 21 | 22 | # Authorship. 23 | __author__ = "NVIDIA Corporation" 24 | 25 | # Set collection name. 26 | __description__ = "Common collection" 27 | -------------------------------------------------------------------------------- /nemo/collections/common/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback 16 | -------------------------------------------------------------------------------- /nemo/collections/common/callbacks/callbacks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import time 15 | 16 | from pytorch_lightning.callbacks.base import Callback 17 | from pytorch_lightning.utilities import rank_zero_only 18 | 19 | 20 | class LogEpochTimeCallback(Callback): 21 | """Simple callback that logs how long each epoch takes, in seconds, to a pytorch lightning log 22 | """ 23 | 24 | @rank_zero_only 25 | def on_epoch_start(self, trainer, pl_module): 26 | self.epoch_start = time.time() 27 | 28 | @rank_zero_only 29 | def on_epoch_end(self, trainer, pl_module): 30 | curr_time = time.time() 31 | duration = curr_time - self.epoch_start 32 | trainer.logger.log_metrics({"epoch_time": duration}, step=trainer.global_step) 33 | -------------------------------------------------------------------------------- /nemo/collections/common/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.common.losses.aggregator import AggregatorLoss 16 | from nemo.collections.common.losses.cross_entropy import CrossEntropyLoss 17 | from nemo.collections.common.losses.mse_loss import MSELoss 18 | from nemo.collections.common.losses.smoothed_cross_entropy import SmoothedCrossEntropyLoss 19 | from nemo.collections.common.losses.spanning_loss import SpanningLoss 20 | -------------------------------------------------------------------------------- /nemo/collections/common/losses/aggregator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import List 16 | 17 | import torch 18 | 19 | from nemo.core.classes import Loss, typecheck 20 | from nemo.core.neural_types import LossType, NeuralType 21 | 22 | __all__ = ['AggregatorLoss'] 23 | 24 | 25 | class AggregatorLoss(Loss): 26 | """ 27 | Sums several losses into one. 28 | 29 | Args: 30 | num_inputs: number of input losses 31 | weights: a list of coefficient for merging losses 32 | """ 33 | 34 | @property 35 | def input_types(self): 36 | """Returns definitions of module input ports. 37 | """ 38 | input_types = {} 39 | for i in range(self._num_losses): 40 | input_types["loss_" + str(i + 1)] = NeuralType(elements_type=LossType()) 41 | 42 | return input_types 43 | 44 | @property 45 | def output_types(self): 46 | """Returns definitions of module output ports. 47 | """ 48 | return {"loss": NeuralType(elements_type=LossType())} 49 | 50 | def __init__(self, num_inputs: int = 2, weights: List[float] = None): 51 | super().__init__() 52 | self._num_losses = num_inputs 53 | if weights is not None and len(weights) != num_inputs: 54 | raise ValueError("Length of weights should be equal to the number of inputs (num_inputs)") 55 | 56 | self._weights = weights 57 | 58 | @typecheck() 59 | def forward(self, **kwargs): 60 | values = [kwargs[x] for x in sorted(kwargs.keys())] 61 | loss = torch.zeros_like(values[0]) 62 | for loss_idx, loss_value in enumerate(values): 63 | if self._weights is not None: 64 | loss = loss.add(loss_value, alpha=self._weights[loss_idx]) 65 | else: 66 | loss = loss.add(loss_value) 67 | return loss 68 | -------------------------------------------------------------------------------- /nemo/collections/common/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from torch import Tensor, nn 16 | 17 | from nemo.core.classes import Serialization, Typing, typecheck 18 | from nemo.core.neural_types import LabelsType, LossType, NeuralType, RegressionValuesType 19 | 20 | __all__ = ['MSELoss'] 21 | 22 | 23 | class MSELoss(nn.MSELoss, Serialization, Typing): 24 | """ 25 | MSELoss 26 | """ 27 | 28 | @property 29 | def input_types(self): 30 | """Returns definitions of module input ports. 31 | """ 32 | return { 33 | "preds": NeuralType(tuple('B'), RegressionValuesType()), 34 | "labels": NeuralType(tuple('B'), LabelsType()), 35 | } 36 | 37 | @property 38 | def output_types(self): 39 | """Returns definitions of module output ports. 40 | """ 41 | return {"loss": NeuralType(elements_type=LossType())} 42 | 43 | def __init__(self, reduction: str = 'mean'): 44 | """ 45 | Args: 46 | reduction: type of the reduction over the batch 47 | """ 48 | super().__init__(reduction=reduction) 49 | 50 | @typecheck() 51 | def forward(self, preds: Tensor, labels: Tensor) -> Tensor: 52 | """ 53 | Args: 54 | preds: output of the classifier 55 | labels: ground truth labels 56 | """ 57 | return super().forward(preds, labels) 58 | -------------------------------------------------------------------------------- /nemo/collections/common/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.common.metrics.classification_accuracy import TopKClassificationAccuracy, compute_topk_accuracy 16 | -------------------------------------------------------------------------------- /nemo/collections/common/parts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.common.parts.multi_layer_perceptron import MultiLayerPerceptron 16 | from nemo.collections.common.parts.transformer_utils import * 17 | from nemo.collections.common.parts.utils import * 18 | -------------------------------------------------------------------------------- /nemo/collections/common/parts/multi_layer_perceptron.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | 18 | class MultiLayerPerceptron(torch.nn.Module): 19 | """ 20 | A simple MLP that can either be used independently or put on top 21 | of pretrained models (such as BERT) and act as a classifier. 22 | Args: 23 | hidden_size (int): the size of each layer 24 | num_classes (int): number of output classes 25 | num_layers (int): number of layers 26 | activation (str): type of activations for layers in between 27 | log_softmax (bool): whether to add a log_softmax layer before output 28 | """ 29 | 30 | def __init__( 31 | self, 32 | hidden_size: int, 33 | num_classes: int, 34 | num_layers: int = 2, 35 | activation: str = 'relu', 36 | log_softmax: bool = True, 37 | ): 38 | super().__init__() 39 | self.layers = 0 40 | for _ in range(num_layers - 1): 41 | layer = torch.nn.Linear(hidden_size, hidden_size) 42 | setattr(self, f'layer{self.layers}', layer) 43 | setattr(self, f'layer{self.layers + 1}', getattr(torch, activation)) 44 | self.layers += 2 45 | layer = torch.nn.Linear(hidden_size, num_classes) 46 | setattr(self, f'layer{self.layers}', layer) 47 | self.layers += 1 48 | self.log_softmax = log_softmax 49 | 50 | @property 51 | def last_linear_layer(self): 52 | return getattr(self, f'layer{self.layers - 1}') 53 | 54 | def forward(self, hidden_states): 55 | output_states = hidden_states[:] 56 | for i in range(self.layers): 57 | output_states = getattr(self, f'layer{i}')(output_states) 58 | 59 | if self.log_softmax: 60 | output_states = torch.log_softmax(output_states, dim=-1) 61 | return output_states 62 | -------------------------------------------------------------------------------- /nemo/collections/common/parts/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import math 16 | import os 17 | from typing import List 18 | 19 | __all__ = ['if_exist', '_compute_softmax'] 20 | 21 | 22 | def if_exist(outfold: str, files: List[str]): 23 | """ 24 | Returns true if all given files exist in the given folder 25 | Args: 26 | outfold: folder path 27 | files: list of file names relative to outfold 28 | """ 29 | if not os.path.exists(outfold): 30 | return False 31 | for file in files: 32 | if not os.path.exists(f'{outfold}/{file}'): 33 | return False 34 | return True 35 | 36 | 37 | def _compute_softmax(scores): 38 | """Compute softmax probability over raw logits.""" 39 | if not scores: 40 | return [] 41 | 42 | max_score = None 43 | for score in scores: 44 | if max_score is None or score > max_score: 45 | max_score = score 46 | 47 | exp_scores = [] 48 | total_sum = 0.0 49 | for score in scores: 50 | x = math.exp(score - max_score) 51 | exp_scores.append(x) 52 | total_sum += x 53 | 54 | probs = [] 55 | for score in exp_scores: 56 | probs.append(score / total_sum) 57 | return probs 58 | -------------------------------------------------------------------------------- /nemo/collections/common/tokenizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.common.tokenizers.char_tokenizer import CharTokenizer 16 | from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer 17 | from nemo.collections.common.tokenizers.sentencepiece_tokenizer import SentencePieceTokenizer 18 | from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec 19 | from nemo.collections.common.tokenizers.word_tokenizer import WordTokenizer 20 | -------------------------------------------------------------------------------- /nemo/collections/common/tokenizers/huggingface/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer 16 | -------------------------------------------------------------------------------- /nemo/collections/common/tokenizers/tokenizer_spec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import List 17 | 18 | __all__ = ['TokenizerSpec'] 19 | 20 | 21 | class TokenizerSpec(ABC): 22 | """ 23 | Inherit this class to implement a new tokenizer. 24 | """ 25 | 26 | @abstractmethod 27 | def text_to_tokens(self, text): 28 | pass 29 | 30 | @abstractmethod 31 | def tokens_to_text(self, tokens): 32 | pass 33 | 34 | @abstractmethod 35 | def tokens_to_ids(self, tokens): 36 | pass 37 | 38 | @abstractmethod 39 | def ids_to_tokens(self, ids): 40 | pass 41 | 42 | @abstractmethod 43 | def text_to_ids(self, text): 44 | pass 45 | 46 | @abstractmethod 47 | def ids_to_text(self, ids): 48 | pass 49 | 50 | def add_special_tokens(self, special_tokens: List[str]): 51 | raise NotImplementedError("To be implemented") 52 | 53 | @property 54 | def name(self): 55 | return type(self).__name__ 56 | -------------------------------------------------------------------------------- /nemo/collections/common/tokenizers/word_tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional 16 | 17 | from nemo.collections.common.tokenizers.char_tokenizer import CharTokenizer 18 | 19 | __all__ = ['WordTokenizer'] 20 | 21 | 22 | class WordTokenizer(CharTokenizer): 23 | "Tokenizes at word boundary" 24 | 25 | def __init__( 26 | self, 27 | vocab_file: str, 28 | mask_token: Optional[str] = None, 29 | bos_token: Optional[str] = None, 30 | eos_token: Optional[str] = None, 31 | pad_token: Optional[str] = None, 32 | sep_token: Optional[str] = None, 33 | cls_token: Optional[str] = None, 34 | unk_token: Optional[str] = None, 35 | ): 36 | """ 37 | Args: 38 | vocab_file: path to file with vocabulary which consists 39 | of characters separated by \n 40 | mask_token: mask token 41 | bos_token: the beginning of sequence token 42 | eos_token: the end of sequence token. Usually equal to sep_token 43 | pad_token: token to use for padding 44 | sep_token: token used for separating sequences 45 | cls_token: class token. Usually equal to bos_token 46 | unk_token: token to use for unknown tokens 47 | """ 48 | 49 | super().__init__( 50 | vocab_file=vocab_file, 51 | mask_token=mask_token, 52 | bos_token=bos_token, 53 | eos_token=eos_token, 54 | pad_token=pad_token, 55 | unk_token=unk_token, 56 | sep_token=sep_token, 57 | cls_token=cls_token, 58 | ) 59 | 60 | def text_to_tokens(self, text): 61 | token_candidates = text.strip().split() 62 | tokens = [] 63 | for token in token_candidates: 64 | if token in self.vocab: 65 | tokens.append(token) 66 | else: 67 | tokens.append(self.unk_token) 68 | return tokens 69 | 70 | def ids_to_text(self, ids): 71 | ids_ = [id_ for id_ in ids if id_ not in self.special_tokens] 72 | return " ".join(self.ids_to_tokens(ids_)) 73 | -------------------------------------------------------------------------------- /nemo/collections/cv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.cv import datasets, losses, models, modules 16 | from nemo.package_info import __version__ 17 | 18 | # Set collection version equal to NeMo version. 19 | __version = __version__ 20 | 21 | # Authorship. 22 | __author__ = "NVIDIA Corporation" 23 | 24 | # Set collection name. 25 | __description__ = "Computer Vision collection" 26 | -------------------------------------------------------------------------------- /nemo/collections/cv/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.cv.datasets.mnist_dataset import MNISTDataset, MNISTDatasetConfig 16 | -------------------------------------------------------------------------------- /nemo/collections/cv/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.cv.losses.nll_loss import NLLLoss 16 | -------------------------------------------------------------------------------- /nemo/collections/cv/losses/nll_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional 16 | 17 | from torch.nn import NLLLoss as torch_NLLLoss 18 | 19 | from nemo.core.classes import Serialization, Typing, typecheck 20 | from nemo.core.neural_types import ClassificationTarget, LogprobsType, LossType, NeuralType 21 | from nemo.utils.decorators import experimental 22 | 23 | 24 | @experimental 25 | class NLLLoss(torch_NLLLoss, Serialization, Typing): 26 | """ Class representing a simple NLL loss. """ 27 | 28 | def __init__(self, name: Optional[str] = None): 29 | """ 30 | Constructor. 31 | 32 | Args: 33 | name: Name of the module (DEFAULT: None) 34 | """ 35 | # Call the base constructors. 36 | # Serialization.__init__(self, name=name) 37 | torch_NLLLoss.__init__(self) 38 | 39 | @property 40 | def input_types(self): 41 | """ Returns definitions of module input ports. """ 42 | return { 43 | "predictions": NeuralType(axes=('B', 'ANY'), elements_type=LogprobsType()), 44 | "targets": NeuralType(axes=('B'), elements_type=ClassificationTarget()), 45 | } 46 | 47 | @property 48 | def output_types(self): 49 | """ Returns definitions of module output ports. """ 50 | return {"loss": NeuralType(elements_type=LossType())} 51 | 52 | @typecheck() 53 | def forward(self, predictions, targets): 54 | return torch_NLLLoss().forward(input=predictions, target=targets) 55 | -------------------------------------------------------------------------------- /nemo/collections/cv/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.cv.models.mnist_lenet5 import MNISTLeNet5, MNISTLeNet5Config 16 | -------------------------------------------------------------------------------- /nemo/collections/cv/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.cv.modules.lenet5 import LeNet5 16 | -------------------------------------------------------------------------------- /nemo/collections/nlp/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp import data, models, modules 16 | from nemo.package_info import __version__ 17 | 18 | # Set collection version equal to NeMo version. 19 | __version = __version__ 20 | 21 | # Authorship. 22 | __author__ = "NVIDIA Corporation" 23 | 24 | # Set collection name. 25 | __description__ = "Natural Language Processing collection" 26 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.data.data_utils import * 16 | from nemo.collections.nlp.data.language_modeling.l2r_lm_dataset import L2RLanguageModelingDataset 17 | from nemo.collections.nlp.data.language_modeling.lm_bert_dataset import ( 18 | BertPretrainingDataset, 19 | BertPretrainingPreprocessedDataloader, 20 | ) 21 | from nemo.collections.nlp.data.question_answering_squad.qa_dataset import SquadDataset 22 | from nemo.collections.nlp.data.token_classification.token_classification_dataset import ( 23 | BertTokenClassificationDataset, 24 | BertTokenClassificationInferDataset, 25 | ) 26 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/data_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.data.data_utils.data_preprocessing import * 16 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/glue_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.data.glue_benchmark.glue_benchmark_dataset import GLUEDataset 16 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/intent_slot_classification/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.collections.nlp.data.intent_slot_classification.intent_slot_classification_dataset import ( 17 | IntentSlotClassificationDataset, 18 | ) 19 | from nemo.collections.nlp.data.intent_slot_classification.intent_slot_classification_descriptor import ( 20 | IntentSlotDataDesc, 21 | ) 22 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/language_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.data.language_modeling.l2r_lm_dataset import L2RLanguageModelingDataset 16 | from nemo.collections.nlp.data.language_modeling.lm_bert_dataset import ( 17 | BertPretrainingDataset, 18 | BertPretrainingPreprocessedDataloader, 19 | ) 20 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/language_modeling/l2r_lm_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Pytorch Dataset for training left-to-right language models.""" 16 | from typing import Optional 17 | 18 | import numpy as np 19 | from torch.utils.data import Dataset 20 | 21 | from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec 22 | from nemo.collections.nlp.data.data_utils import dataset_to_ids 23 | 24 | __all__ = ['L2RLanguageModelingDataset'] 25 | 26 | 27 | class L2RLanguageModelingDataset(Dataset): 28 | """ 29 | Dataset for training and evaluating left-to-right language models. 30 | 31 | Args: 32 | tokenizer: tokenizer, such as WordTokenizer or CharTokenizer 33 | dataset: path to data 34 | max_seq_length: maximum sequence length (in tokens) of input tensors 35 | batch_step: distance (in tokens) between two successive sequences of 36 | the text. By default, it is equal to max_seq_length which corresponds 37 | to splitting text into disjoint segments covering full dataset 38 | """ 39 | 40 | def __init__( 41 | self, 42 | tokenizer: TokenizerSpec, 43 | dataset: str, 44 | max_seq_length: Optional[int] = 512, 45 | batch_step: Optional[int] = None, 46 | ): 47 | self.tokenizer = tokenizer 48 | self.max_seq_length = max_seq_length 49 | self.batch_step = batch_step or self.max_seq_length 50 | ids = dataset_to_ids(dataset, tokenizer, add_bos_eos=False) 51 | self.ids = np.array([j for i in ids for j in i]) 52 | 53 | def __len__(self): 54 | return (len(self.ids) - self.max_seq_length) // self.batch_step 55 | 56 | def __getitem__(self, idx): 57 | left = idx * self.batch_step 58 | right = left + self.max_seq_length 59 | src_ids = self.ids[left:right] 60 | labels = self.ids[left + 1 : right + 1] 61 | src_mask = (src_ids != self.tokenizer.pad_id).astype(np.float32) 62 | return src_ids, src_mask, labels 63 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/question_answering_squad/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/text_classification/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.collections.nlp.data.text_classification.text_classification_dataset import ( 17 | TextClassificationDataset, 18 | calc_class_weights, 19 | ) 20 | -------------------------------------------------------------------------------- /nemo/collections/nlp/data/token_classification/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo/collections/nlp/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.metrics.classification_report import ClassificationReport 16 | from nemo.collections.nlp.metrics.perplexity import Perplexity 17 | -------------------------------------------------------------------------------- /nemo/collections/nlp/metrics/perplexity.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | import torch 18 | from pytorch_lightning.metrics import TensorMetric 19 | 20 | from nemo.utils import logging 21 | 22 | __all__ = ['Perplexity'] 23 | 24 | 25 | class Perplexity(TensorMetric): 26 | """ 27 | This metric computes the perplexity given the language model loss. 28 | """ 29 | 30 | def __init__(self): 31 | super(Perplexity, self).__init__(name="Perplexity") 32 | 33 | def forward(self, loss: torch.Tensor) -> torch.Tensor: 34 | return torch.exp(loss) 35 | -------------------------------------------------------------------------------- /nemo/collections/nlp/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.models.glue_benchmark.glue_benchmark_model import GLUEModel 16 | from nemo.collections.nlp.models.intent_slot_classification import IntentSlotClassificationModel 17 | from nemo.collections.nlp.models.language_modeling.bert_lm_model import BERTLMModel 18 | from nemo.collections.nlp.models.language_modeling.transformer_lm_model import TransformerLMModel 19 | from nemo.collections.nlp.models.question_answering.qa_model import QAModel 20 | from nemo.collections.nlp.models.text_classification import TextClassificationModel 21 | from nemo.collections.nlp.models.token_classification import PunctuationCapitalizationModel, TokenClassificationModel 22 | -------------------------------------------------------------------------------- /nemo/collections/nlp/models/glue_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.models.glue_benchmark.glue_benchmark_model import GLUEModel 16 | -------------------------------------------------------------------------------- /nemo/collections/nlp/models/glue_benchmark/metrics_for_glue.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Google AI Language Team Authors and 2 | # The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from typing import Dict, List 18 | 19 | from scipy.stats import pearsonr, spearmanr 20 | from sklearn.metrics import f1_score, matthews_corrcoef 21 | 22 | __all__ = ['compute_metrics'] 23 | 24 | 25 | def accuracy(preds: List[int], labels: List[int]): 26 | return {"acc": (preds == labels).mean()} 27 | 28 | 29 | def acc_and_f1(preds: List[int], labels: List[int]): 30 | accuracy = (preds == labels).mean() 31 | f1 = f1_score(y_true=labels, y_pred=preds) 32 | return {"acc": accuracy, "f1": f1} 33 | 34 | 35 | def mcc(preds: List[int], labels: List[int]): 36 | return {"mcc": matthews_corrcoef(labels, preds)} 37 | 38 | 39 | def pearson_and_spearman(preds: List[int], labels: List[int]): 40 | pearson_corr = pearsonr(preds, labels)[0] 41 | spearman_corr = spearmanr(preds, labels)[0] 42 | return {"pearson": pearson_corr, "spearmanr": spearman_corr, "pear+spear av": (pearson_corr + spearman_corr) / 2} 43 | 44 | 45 | def compute_metrics(task_name: str, preds: List[int], labels: List[int]) -> Dict[str, float]: 46 | """ 47 | Computes metrics for GLUE tasks 48 | Args: 49 | task_name: GLUE task name 50 | preds: model predictions 51 | labels: golden labels 52 | Returns: 53 | metrics 54 | """ 55 | if len(preds) != len(labels): 56 | raise ValueError("Predictions and labels must have the same length") 57 | 58 | metric_fn = accuracy 59 | if task_name == 'cola': 60 | metric_fn = mcc 61 | elif task_name in ['mrpc', 'qqp']: 62 | metric_fn = acc_and_f1 63 | elif task_name == 'sts-b': 64 | metric_fn = pearson_and_spearman 65 | 66 | return metric_fn(preds, labels) 67 | -------------------------------------------------------------------------------- /nemo/collections/nlp/models/intent_slot_classification/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.models.intent_slot_classification.intent_slot_classification_model import ( 16 | IntentSlotClassificationModel, 17 | ) 18 | -------------------------------------------------------------------------------- /nemo/collections/nlp/models/language_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.models.language_modeling.bert_lm_model import BERTLMModel 16 | from nemo.collections.nlp.models.language_modeling.transformer_lm_model import TransformerLMModel 17 | -------------------------------------------------------------------------------- /nemo/collections/nlp/models/question_answering/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.models.question_answering.qa_model import QAModel 16 | -------------------------------------------------------------------------------- /nemo/collections/nlp/models/text_classification/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.models.text_classification.text_classification_model import TextClassificationModel 16 | -------------------------------------------------------------------------------- /nemo/collections/nlp/models/token_classification/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.collections.nlp.models.token_classification.punctuation_capitalization_model import ( 17 | PunctuationCapitalizationModel, 18 | ) 19 | from nemo.collections.nlp.models.token_classification.token_classification_model import TokenClassificationModel 20 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.collections.nlp.modules.common import ( 17 | AlbertEncoder, 18 | BertEncoder, 19 | BertModule, 20 | DistilBertEncoder, 21 | MegatronBertEncoder, 22 | RobertaEncoder, 23 | SequenceClassifier, 24 | SequenceRegression, 25 | SequenceTokenClassifier, 26 | get_lm_model, 27 | get_megatron_lm_models_list, 28 | get_pretrained_lm_models_list, 29 | get_tokenizer, 30 | get_tokenizer_list, 31 | ) 32 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Google AI Language Team Authors and 2 | # The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from nemo.collections.nlp.modules.common.bert_module import BertModule 18 | from nemo.collections.nlp.modules.common.huggingface import ( 19 | AlbertEncoder, 20 | BertEncoder, 21 | DistilBertEncoder, 22 | RobertaEncoder, 23 | ) 24 | from nemo.collections.nlp.modules.common.lm_utils import ( 25 | get_lm_model, 26 | get_megatron_lm_models_list, 27 | get_pretrained_lm_models_list, 28 | ) 29 | from nemo.collections.nlp.modules.common.megatron import MegatronBertEncoder 30 | from nemo.collections.nlp.modules.common.sequence_classifier import SequenceClassifier 31 | from nemo.collections.nlp.modules.common.sequence_regression import SequenceRegression 32 | from nemo.collections.nlp.modules.common.sequence_token_classifier import SequenceTokenClassifier 33 | from nemo.collections.nlp.modules.common.token_classifier import BertPretrainingTokenClassifier, TokenClassifier 34 | from nemo.collections.nlp.modules.common.tokenizer_utils import get_tokenizer, get_tokenizer_list 35 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/huggingface/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.modules.common.huggingface.albert import AlbertEncoder 16 | from nemo.collections.nlp.modules.common.huggingface.auto import AutoModelEncoder 17 | from nemo.collections.nlp.modules.common.huggingface.bert import BertEncoder 18 | from nemo.collections.nlp.modules.common.huggingface.distilbert import DistilBertEncoder 19 | from nemo.collections.nlp.modules.common.huggingface.huggingface_utils import ( 20 | get_huggingface_lm_model, 21 | get_huggingface_pretrained_lm_models_list, 22 | ) 23 | from nemo.collections.nlp.modules.common.huggingface.roberta import RobertaEncoder 24 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/huggingface/albert.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Google AI Language Team Authors and 2 | # The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from transformers import AlbertModel 18 | 19 | from nemo.collections.nlp.modules.common.bert_module import BertModule 20 | from nemo.core.classes import typecheck 21 | 22 | __all__ = ['AlbertEncoder'] 23 | 24 | 25 | class AlbertEncoder(AlbertModel, BertModule): 26 | """ 27 | Wraps around the Huggingface transformers implementation repository for easy use within NeMo. 28 | """ 29 | 30 | @typecheck() 31 | def forward(self, input_ids, attention_mask, token_type_ids): 32 | res = super().forward(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)[0] 33 | return res 34 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/huggingface/auto.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Google AI Language Team Authors and 2 | # The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from transformers import AutoModel, PreTrainedModel 18 | 19 | from nemo.collections.nlp.modules.common.bert_module import BertModule 20 | from nemo.utils.decorators import experimental 21 | 22 | __all__ = ['AutoModelEncoder'] 23 | 24 | 25 | @experimental 26 | class AutoModelEncoder(PreTrainedModel, BertModule): 27 | """ 28 | Wraps around the Huggingface transformers implementation repository for easy use within NeMo. 29 | """ 30 | 31 | def __init__(self, pretrained_model_name_or_path): 32 | BertModule.__init__(self) 33 | lm_model = AutoModel.from_pretrained(pretrained_model_name_or_path) 34 | PreTrainedModel.__init__(self, config=lm_model.config) 35 | self.lm_model = lm_model 36 | self.type = type(lm_model) 37 | 38 | def forward(self, **kwargs): 39 | unexpected_keys = set(kwargs.keys()) - set(self.lm_model.forward.__code__.co_varnames) 40 | 41 | for key in unexpected_keys: 42 | del kwargs[key] 43 | res = self.lm_model.forward(**kwargs)[0] 44 | return res 45 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/huggingface/bert.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Google AI Language Team Authors and 2 | # The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from transformers import BertModel 18 | 19 | from nemo.collections.nlp.modules.common.bert_module import BertModule 20 | from nemo.core.classes import typecheck 21 | 22 | __all__ = ['BertEncoder'] 23 | 24 | 25 | class BertEncoder(BertModel, BertModule): 26 | """ 27 | Wraps around the Huggingface transformers implementation repository for easy use within NeMo. 28 | """ 29 | 30 | @typecheck() 31 | def forward(self, input_ids, attention_mask, token_type_ids): 32 | res = super().forward(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)[0] 33 | return res 34 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/huggingface/distilbert.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The Google AI Language Team Authors and 2 | # The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from transformers import DistilBertModel 18 | 19 | from nemo.collections.nlp.modules.common.bert_module import BertModule 20 | from nemo.core.classes import typecheck 21 | 22 | __all__ = ['DistilBertEncoder'] 23 | 24 | 25 | class DistilBertEncoder(DistilBertModel, BertModule): 26 | """ 27 | Wraps around the Huggingface transformers implementation repository for easy use within NeMo. 28 | """ 29 | 30 | @typecheck() 31 | def forward(self, input_ids, attention_mask, token_type_ids=None): 32 | # distilBert does not use token_type_ids as the most of the other Bert models 33 | res = super().forward(input_ids=input_ids, attention_mask=attention_mask)[0] 34 | return res 35 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/huggingface/roberta.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Google AI Language Team Authors and 2 | # The HuggingFace Inc. team. 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | from transformers import RobertaModel 19 | 20 | from nemo.collections.nlp.modules.common.bert_module import BertModule 21 | from nemo.core.classes import typecheck 22 | 23 | __all__ = ['RobertaEncoder'] 24 | 25 | 26 | class RobertaEncoder(RobertaModel, BertModule): 27 | """ 28 | Wraps around the Huggingface transformers implementation repository for easy use within NeMo. 29 | """ 30 | 31 | @typecheck() 32 | def forward(self, input_ids, token_type_ids, attention_mask): 33 | res = super().forward(input_ids=input_ids, attention_mask=attention_mask)[0] 34 | return res 35 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.modules.common.megatron.megatron_bert import MegatronBertEncoder 16 | from nemo.collections.nlp.modules.common.megatron.megatron_utils import ( 17 | get_megatron_checkpoint, 18 | get_megatron_lm_models_list, 19 | ) 20 | -------------------------------------------------------------------------------- /nemo/collections/nlp/modules/common/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.nlp.modules.common.transformer.transformer_decoders import * 16 | from nemo.collections.nlp.modules.common.transformer.transformer_encoders import * 17 | from nemo.collections.nlp.modules.common.transformer.transformer_generators import * 18 | from nemo.collections.nlp.modules.common.transformer.transformer_modules import * 19 | -------------------------------------------------------------------------------- /nemo/collections/nlp/parts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.collections.nlp.parts.utils_funcs import list2str, tensor2list 17 | -------------------------------------------------------------------------------- /nemo/collections/tts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import nemo.collections.tts.data 16 | import nemo.collections.tts.helpers 17 | import nemo.collections.tts.models 18 | -------------------------------------------------------------------------------- /nemo/collections/tts/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import nemo.collections.tts.data.datalayers 16 | -------------------------------------------------------------------------------- /nemo/collections/tts/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import nemo.collections.tts.helpers.helpers 16 | -------------------------------------------------------------------------------- /nemo/collections/tts/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import nemo.collections.tts.losses.tacotron2loss 16 | import nemo.collections.tts.losses.waveglowloss 17 | -------------------------------------------------------------------------------- /nemo/collections/tts/losses/waveglowloss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import torch 15 | 16 | from nemo.core.classes import Loss, typecheck 17 | from nemo.core.neural_types.elements import LossType, NormalDistributionSamplesType, VoidType 18 | from nemo.core.neural_types.neural_type import NeuralType 19 | 20 | 21 | class WaveGlowLoss(Loss): 22 | """ A Loss module that computes loss for WaveGlow 23 | """ 24 | 25 | @property 26 | def input_types(self): 27 | return { 28 | "z": NeuralType(('B', 'flowgroup', 'T'), NormalDistributionSamplesType()), 29 | "log_s_list": NeuralType(('B', 'flowgroup', 'T'), VoidType()), # TODO: Figure out a good typing 30 | "log_det_W_list": NeuralType(elements_type=VoidType()), # TODO: Figure out a good typing 31 | "sigma": NeuralType(optional=True), 32 | } 33 | 34 | @property 35 | def output_types(self): 36 | return { 37 | "loss": NeuralType(elements_type=LossType()), 38 | } 39 | 40 | @typecheck() 41 | def forward(self, *, z, log_s_list, log_det_W_list, sigma=1.0): 42 | for i, log_s in enumerate(log_s_list): 43 | if i == 0: 44 | log_s_total = torch.sum(log_s) 45 | log_det_W_total = log_det_W_list[i] 46 | else: 47 | log_s_total = log_s_total + torch.sum(log_s) 48 | log_det_W_total += log_det_W_list[i] 49 | 50 | loss = torch.sum(z * z) / (2 * sigma * sigma) - log_s_total - log_det_W_total 51 | return loss / (z.size(0) * z.size(1) * z.size(2)) 52 | -------------------------------------------------------------------------------- /nemo/collections/tts/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.tts.models.glow_tts import GlowTTSModel 16 | from nemo.collections.tts.models.squeezewave import SqueezeWaveModel 17 | from nemo.collections.tts.models.tacotron2 import Tacotron2Model 18 | from nemo.collections.tts.models.waveglow import WaveGlowModel 19 | 20 | __all__ = ["GlowTTSModel", "SqueezeWaveModel", "Tacotron2Model", "WaveGlowModel"] 21 | -------------------------------------------------------------------------------- /nemo/collections/tts/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.collections.tts.modules.denoiser import SqueezeWaveDenoiser 16 | from nemo.collections.tts.modules.glow_tts import GlowTTSModule 17 | from nemo.collections.tts.modules.squeezewave import SqueezeWaveModule 18 | from nemo.collections.tts.modules.tacotron2 import Decoder as Taco2Decoder 19 | from nemo.collections.tts.modules.tacotron2 import Encoder as Taco2Encoder 20 | from nemo.collections.tts.modules.tacotron2 import Postnet as Taco2Postnet 21 | from nemo.collections.tts.modules.waveglow import WaveGlowModule 22 | -------------------------------------------------------------------------------- /nemo/collections/tts/modules/denoiser.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | from nemo.collections.asr.parts.features import STFTExactPad 18 | from nemo.collections.tts.modules.squeezewave import OperationMode 19 | 20 | 21 | class SqueezeWaveDenoiser(torch.nn.Module): 22 | def __init__(self, model, n_mel=80, filter_length=1024, hop_length=256, win_length=1024, window='hann'): 23 | super().__init__() 24 | assert hasattr(model, 'squeezewave') 25 | 26 | self.stft = STFTExactPad( 27 | filter_length=filter_length, hop_length=hop_length, win_length=win_length, window=window, 28 | ).to(model.device) 29 | 30 | with torch.no_grad(): 31 | spect = torch.zeros((1, n_mel, 88)).to(model.device) 32 | bias_audio = model.convert_spectrogram_to_audio(spect=spect, sigma=0.0) 33 | bias_spect, _ = self.stft.transform(bias_audio) 34 | self.bias_spect = bias_spect[:, :, 0][:, :, None] 35 | 36 | # Reset mode to validation since `model.convert_spectrogram_to_audio` sets it to infer 37 | model.mode = OperationMode.validation 38 | model.squeezewave.mode = OperationMode.validation 39 | 40 | def forward(self, audio, strength=0.1): 41 | audio_spect, audio_angles = self.stft.transform(audio) 42 | audio_spect_denoised = audio_spect - self.bias_spect * strength 43 | audio_spect_denoised = torch.clamp(audio_spect_denoised, 0.0) 44 | audio_denoised = self.stft.inverse(audio_spect_denoised, audio_angles) 45 | return audio_denoised 46 | -------------------------------------------------------------------------------- /nemo/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | NEMO_ENV_VARNAME_ENABLE_COLORING = "NEMO_ENABLE_COLORING" 16 | NEMO_ENV_VARNAME_REDIRECT_LOGS_TO_STDERR = "NEMO_REDIRECT_LOGS_TO_STDERR" 17 | NEMO_ENV_VARNAME_TESTING = "NEMO_TESTING" # Set to True to enable nemo.util.logging's debug mode 18 | NEMO_ENV_VARNAME_VERSION = "NEMO_EXPM_VERSION" # Used for nemo.utils.exp_manager versioning 19 | -------------------------------------------------------------------------------- /nemo/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import nemo.core.neural_types 16 | from nemo.core.classes import * 17 | -------------------------------------------------------------------------------- /nemo/core/classes/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.core.classes.common import FileIO, Model, Serialization, Typing, is_typecheck_enabled, typecheck 17 | from nemo.core.classes.dataset import Dataset, IterableDataset 18 | from nemo.core.classes.exportable import Exportable, ExportFormat 19 | from nemo.core.classes.loss import Loss 20 | from nemo.core.classes.modelPT import ModelPT 21 | from nemo.core.classes.module import NeuralModule 22 | -------------------------------------------------------------------------------- /nemo/core/classes/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | from nemo.core.classes.common import Serialization, Typing 18 | 19 | __all__ = ['Loss'] 20 | 21 | 22 | class Loss(torch.nn.modules.loss._Loss, Typing, Serialization): 23 | """Inherit this class to implement custom loss.""" 24 | 25 | def __init__(self, **kwargs): 26 | super(Loss, self).__init__(**kwargs) 27 | -------------------------------------------------------------------------------- /nemo/core/classes/module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from torch.nn import Module 16 | 17 | from nemo.core.classes.common import FileIO, Serialization, Typing 18 | 19 | __all__ = ['NeuralModule'] 20 | 21 | 22 | class NeuralModule(Module, Typing, Serialization, FileIO): 23 | """ 24 | Abstract class offering interface shared between all PyTorch Neural Modules. 25 | """ 26 | 27 | @property 28 | def num_weights(self): 29 | return sum(p.numel() for p in self.parameters() if p.requires_grad) 30 | 31 | def input_example(self): 32 | """ 33 | Override this method if random inputs won't work 34 | Returns: 35 | A tuple sample of valid input data. 36 | """ 37 | 38 | return 39 | -------------------------------------------------------------------------------- /nemo/core/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.core.config.base_config import Config 16 | from nemo.core.config.optimizers import ( 17 | AdadeltaParams, 18 | AdagradParams, 19 | AdamaxParams, 20 | AdamParams, 21 | AdamWParams, 22 | NovogradParams, 23 | OptimizerParams, 24 | RMSpropParams, 25 | RpropParams, 26 | SGDParams, 27 | get_optimizer_config, 28 | register_optimizer_params, 29 | ) 30 | from nemo.core.config.pytorch import DataLoaderConfig 31 | from nemo.core.config.pytorch_lightning import TrainerConfig 32 | from nemo.core.config.schedulers import ( 33 | CosineAnnealingParams, 34 | InverseSquareRootAnnealingParams, 35 | PolynomialDecayAnnealingParams, 36 | PolynomialHoldDecayAnnealingParams, 37 | SchedulerParams, 38 | SquareAnnealingParams, 39 | SquareRootAnnealingParams, 40 | WarmupAnnealingParams, 41 | WarmupHoldSchedulerParams, 42 | WarmupSchedulerParams, 43 | get_scheduler_config, 44 | register_scheduler_params, 45 | ) 46 | from nemo.core.config.set_config import hydra_runner, set_config 47 | -------------------------------------------------------------------------------- /nemo/core/config/base_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import dataclass 16 | from typing import Optional 17 | 18 | __all__ = ['Config'] 19 | 20 | 21 | @dataclass 22 | class Config: 23 | """ 24 | Abstract NeMo Configuration class. 25 | 26 | Args: 27 | name: name of the module/dataset/loss/model object (used in serialization, DEFAULT: None) 28 | """ 29 | 30 | name: Optional[str] = None 31 | -------------------------------------------------------------------------------- /nemo/core/config/pytorch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import dataclass 16 | from typing import Any, Optional 17 | 18 | from omegaconf import MISSING 19 | 20 | __all__ = ['DataLoaderConfig'] 21 | 22 | 23 | @dataclass 24 | class DataLoaderConfig: 25 | """ 26 | Configuration of PyTorch DataLoader. 27 | 28 | It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name). 29 | 30 | ..note: 31 | For the details on the function/meanings of the arguments, please refer to: 32 | https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader 33 | """ 34 | 35 | batch_size: int = MISSING 36 | shuffle: bool = False 37 | sampler: Optional[Any] = None 38 | batch_sampler: Optional[Any] = None 39 | num_workers: int = 0 40 | collate_fn: Optional[Any] = None 41 | pin_memory: bool = False 42 | drop_last: bool = False 43 | timeout: int = 0 44 | worker_init_fn: Optional[Any] = None 45 | multiprocessing_context: Optional[Any] = None 46 | -------------------------------------------------------------------------------- /nemo/core/config/pytorch_lightning.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import dataclass 16 | from typing import Any, Dict, List, Optional 17 | 18 | __all__ = ['TrainerConfig'] 19 | 20 | 21 | @dataclass 22 | class TrainerConfig: 23 | """ 24 | Configuration of PyTorch Lightning Trainer. 25 | 26 | It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name). 27 | 28 | ..warning: 29 | Picked just few params of the PTL trainer for now. This needs to be discussed. 30 | 31 | ..note: 32 | For the details on the function/meanings of the arguments, please refer to: 33 | https://pytorch-lightning.readthedocs.io/en/latest/trainer.html# 34 | """ 35 | 36 | gradient_clip_val: float = 0 37 | process_position: int = 0 38 | num_nodes: int = 1 39 | num_processes: int = 1 40 | gpus: Optional[int] = None 41 | auto_select_gpus: bool = False 42 | log_gpu_memory: Optional[str] = None 43 | progress_bar_refresh_rate: int = 1 44 | check_val_every_n_epoch: int = 1 45 | fast_dev_run: bool = False 46 | max_epochs: int = 1000 47 | min_epochs: int = 1 48 | distributed_backend: Optional[str] = None 49 | max_steps: Optional[int] = None 50 | accumulate_grad_batches: int = 1 51 | amp_level: str = "O0" 52 | -------------------------------------------------------------------------------- /nemo/core/neural_types/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.core.neural_types.axes import * 17 | from nemo.core.neural_types.comparison import * 18 | from nemo.core.neural_types.elements import * 19 | from nemo.core.neural_types.neural_type import * 20 | -------------------------------------------------------------------------------- /nemo/core/neural_types/comparison.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from enum import Enum 16 | 17 | __all__ = ['NeuralTypeComparisonResult'] 18 | 19 | 20 | class NeuralTypeComparisonResult(Enum): 21 | """The result of comparing two neural type objects for compatibility. 22 | When comparing A.compare_to(B):""" 23 | 24 | SAME = 0 25 | LESS = 1 # A is B 26 | GREATER = 2 # B is A 27 | DIM_INCOMPATIBLE = 3 # Resize connector might fix incompatibility 28 | TRANSPOSE_SAME = 4 # A transpose and/or converting between lists and tensors will make them same 29 | CONTAINER_SIZE_MISMATCH = 5 # A and B contain different number of elements 30 | INCOMPATIBLE = 6 # A and B are incompatible 31 | SAME_TYPE_INCOMPATIBLE_PARAMS = 7 # A and B are of the same type but parametrized differently 32 | UNCHECKED = 8 # type comparison wasn't done 33 | -------------------------------------------------------------------------------- /nemo/core/optim/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.core.optim.lr_scheduler import ( 16 | CosineAnnealing, 17 | InverseSquareRootAnnealing, 18 | PolynomialDecayAnnealing, 19 | PolynomialHoldDecayAnnealing, 20 | SquareAnnealing, 21 | SquareRootAnnealing, 22 | WarmupAnnealing, 23 | WarmupHoldPolicy, 24 | WarmupPolicy, 25 | prepare_lr_scheduler, 26 | ) 27 | from nemo.core.optim.novograd import Novograd 28 | from nemo.core.optim.optimizers import get_optimizer, parse_optimizer_args, register_optimizer 29 | -------------------------------------------------------------------------------- /nemo/package_info.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | MAJOR = 1 17 | MINOR = 0 18 | PATCH = 0 19 | PRE_RELEASE = 'b1' 20 | 21 | # Use the following formatting: (major, minor, patch, pre-release) 22 | VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE) 23 | 24 | __shortversion__ = '.'.join(map(str, VERSION[:3])) 25 | __version__ = '.'.join(map(str, VERSION[:3])) + ''.join(VERSION[3:]) 26 | 27 | __package_name__ = 'nemo_toolkit' 28 | __contact_names__ = 'NVIDIA' 29 | __contact_emails__ = 'nemo-toolkit@nvidia.com' 30 | __homepage__ = 'https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/' 31 | __repository_url__ = 'https://github.com/nvidia/nemo' 32 | __download_url__ = 'https://github.com/NVIDIA/NeMo/releases' 33 | __description__ = 'NeMo - a toolkit for Conversational AI' 34 | __license__ = 'Apache2' 35 | __keywords__ = 'deep learning, machine learning, gpu, NLP, NeMo, nvidia, pytorch, torch, tts, speech, language' 36 | -------------------------------------------------------------------------------- /nemo/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.utils.nemo_logging import Logger as _Logger 17 | from nemo.utils.nemo_logging import LogMode as logging_mode 18 | from nemo.utils.lightning_logger_patch import add_memory_handlers_to_pl_logger 19 | 20 | logging = _Logger() 21 | add_memory_handlers_to_pl_logger() 22 | -------------------------------------------------------------------------------- /nemo/utils/decorators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from nemo.utils.decorators.deprecated import deprecated 17 | from nemo.utils.decorators.experimental import experimental 18 | from nemo.utils.decorators.port_docs import add_port_docs 19 | -------------------------------------------------------------------------------- /nemo/utils/decorators/deprecated.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | __all__ = [ 17 | 'deprecated', 18 | ] 19 | 20 | import functools 21 | 22 | import wrapt 23 | 24 | from nemo.utils import logging 25 | 26 | # Remember which deprecation warnings have been printed already. 27 | _PRINTED_WARNING = {} 28 | 29 | 30 | def deprecated(wrapped=None, version=None, explanation=None): 31 | """ Decorator class used for indicating that a function is deprecated and going to be removed. 32 | Tracks down which functions printed the warning and will print it only once per function. 33 | """ 34 | 35 | if wrapped is None: 36 | return functools.partial(deprecated, version=version, explanation=explanation) 37 | 38 | @wrapt.decorator 39 | def wrapper(wrapped, instance, args, kwargs): 40 | """ 41 | Method prints the adequate warning (only once per function) when 42 | required and calls the function func, passing the original arguments, 43 | i.e. version and explanation. 44 | 45 | Args: 46 | version: Version in which the function will be removed (optional) 47 | explanation: Additional explanation (optional), e.g. use method ``blabla instead``. 48 | """ 49 | 50 | # Check if we already warned about that function. 51 | if wrapped.__name__ not in _PRINTED_WARNING.keys(): 52 | # Add to list so we won't print it again. 53 | _PRINTED_WARNING[wrapped.__name__] = True 54 | 55 | # Prepare the warning message. 56 | msg = "Function ``{}`` is deprecated.".format(wrapped.__name__) 57 | 58 | # Optionally, add version and alternative. 59 | if version is not None: 60 | msg = msg + " It is going to be removed in " 61 | msg = msg + "the {} version.".format(version) 62 | 63 | if explanation is not None: 64 | msg = msg + " " + explanation 65 | 66 | # Display the deprecated warning. 67 | logging.warning(msg) 68 | 69 | # Call the function. 70 | return wrapped(*args, **kwargs) 71 | 72 | return wrapper(wrapped) 73 | -------------------------------------------------------------------------------- /nemo/utils/decorators/experimental.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | __all__ = ['experimental'] 17 | 18 | from nemo.utils import logging 19 | 20 | 21 | def experimental(cls): 22 | """ Decorator which indicates that module is experimental. 23 | Use it to mark experimental or research modules. 24 | """ 25 | 26 | def wrapped(cls): 27 | logging.warning( 28 | f'Module {cls} is experimental, not ready for production and is not fully supported. Use at your own risk.' 29 | ) 30 | 31 | return cls 32 | 33 | return wrapped(cls=cls) 34 | -------------------------------------------------------------------------------- /nemo/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | class NeMoBaseException(Exception): 17 | """ NeMo Base Exception. All exceptions created in NeMo should inherit from this class""" 18 | 19 | pass 20 | -------------------------------------------------------------------------------- /nemo/utils/formatters/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo/utils/formatters/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import sys 17 | 18 | from nemo.constants import NEMO_ENV_VARNAME_ENABLE_COLORING 19 | from nemo.utils.env_var_parsing import get_envbool 20 | 21 | __all__ = ["check_color_support", "to_unicode"] 22 | 23 | 24 | def check_color_support(): 25 | # Colors can be forced with an env variable 26 | if not sys.platform.lower().startswith("win") and get_envbool(NEMO_ENV_VARNAME_ENABLE_COLORING, False): 27 | return True 28 | 29 | 30 | def to_unicode(value): 31 | """ 32 | Converts a string argument to a unicode string. 33 | If the argument is already a unicode string or None, it is returned 34 | unchanged. Otherwise it must be a byte string and is decoded as utf8. 35 | """ 36 | try: 37 | if isinstance(value, (str, type(None))): 38 | return value 39 | 40 | if not isinstance(value, bytes): 41 | raise TypeError("Expected bytes, unicode, or None; got %r" % type(value)) 42 | 43 | return value.decode("utf-8") 44 | 45 | except UnicodeDecodeError: 46 | return repr(value) 47 | -------------------------------------------------------------------------------- /nemo/utils/get_rank.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo.utils.env_var_parsing import get_envint 16 | 17 | 18 | def is_global_rank_zero(): 19 | """ Helper function to determine if the current process is global_rank 0 (the main process) 20 | """ 21 | # Try to get the pytorch RANK env var 22 | # RANK is set by torch.distributed.launch 23 | rank = get_envint("RANK", None) 24 | if rank: 25 | return rank == 0 26 | 27 | # If not set by pytorch, we need to determine node_rank 28 | def get_node_rank(): 29 | # Use an equivalent of pytorch lightning's determine_ddp_node_rank() 30 | node_rank = 0 31 | # First check if running on a slurm cluster 32 | # TODO: This check could probably be better 33 | num_slurm_tasks = get_envint("SLURM_NTASKS", 0) 34 | if num_slurm_tasks > 0: 35 | node_rank = get_envint("SLURM_NODEID", 0) 36 | else: 37 | node_rank_env = get_envint("NODE_RANK", None) 38 | group_rank = get_envint("GROUP_RANK", None) 39 | if group_rank: 40 | node_rank = group_rank 41 | # Take from NODE_RANK whenever available 42 | if node_rank_env: 43 | node_rank = node_rank_env 44 | return node_rank 45 | 46 | node_rank = get_node_rank() 47 | local_rank = get_envint("LOCAL_RANK", 0) 48 | return node_rank == 0 and local_rank == 0 49 | -------------------------------------------------------------------------------- /nemo/utils/lightning_logger_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging as _logging 16 | from logging.handlers import MemoryHandler 17 | 18 | import pytorch_lightning as pl 19 | 20 | HANDLERS = {} 21 | PATCHED = False 22 | 23 | 24 | def add_memory_handlers_to_pl_logger(): 25 | """ 26 | Adds two MemoryHandlers to pytorch_lightning's logger. These two handlers are essentially message buffers. This 27 | function is called in nemo.utils.__init__.py. These handlers are used in add_filehandlers_to_pl_logger to flush 28 | buffered messages to files. 29 | """ 30 | if not HANDLERS: 31 | HANDLERS["memory_err"] = MemoryHandler(-1) 32 | HANDLERS["memory_err"].addFilter(lambda record: record.levelno > _logging.INFO) 33 | HANDLERS["memory_all"] = MemoryHandler(-1) 34 | pl._logger.addHandler(HANDLERS["memory_err"]) 35 | pl._logger.addHandler(HANDLERS["memory_all"]) 36 | 37 | 38 | def add_filehandlers_to_pl_logger(all_log_file, err_log_file): 39 | """ 40 | Adds two filehandlers to pytorch_lightning's logger. Called in nemo.utils.exp_manager(). The first filehandler 41 | logs all messages to all_log_file while the second filehandler logs all WARNING and higher messages to err_log_file. 42 | If "memory_err" and "memory_all" exist in HANDLERS, then those buffers are flushed to err_log_file and all_log_file 43 | respectively, and then closed. 44 | """ 45 | HANDLERS["file"] = _logging.FileHandler(all_log_file) 46 | pl._logger.addHandler(HANDLERS["file"]) 47 | HANDLERS["file_err"] = _logging.FileHandler(err_log_file) 48 | HANDLERS["file_err"].addFilter(lambda record: record.levelno > _logging.INFO) 49 | pl._logger.addHandler(HANDLERS["file_err"]) 50 | 51 | if HANDLERS.get("memory_all", None): 52 | HANDLERS["memory_all"].setTarget(HANDLERS["file"]) 53 | HANDLERS["memory_all"].close() 54 | del HANDLERS["memory_all"] 55 | if HANDLERS.get("memory_err", None): 56 | HANDLERS["memory_err"].setTarget(HANDLERS["file_err"]) 57 | HANDLERS["memory_err"].close() 58 | del HANDLERS["memory_err"] 59 | -------------------------------------------------------------------------------- /nemo/utils/metaclasses.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import threading 17 | 18 | 19 | class Singleton(type): 20 | """ Implementation of a generic, tread-safe singleton meta-class. 21 | Can be used as meta-class, i.e. will create 22 | """ 23 | 24 | # List of instances - one per class. 25 | __instances = {} 26 | # Lock used for accessing the instance. 27 | __lock = threading.Lock() 28 | 29 | def __call__(cls, *args, **kwargs): 30 | """ Returns singleton instance. A thread safe implementation. """ 31 | if cls not in cls.__instances: 32 | # Enter critical section. 33 | with cls.__lock: 34 | # Check once again. 35 | if cls not in cls.__instances: 36 | # Create a new object instance - one per class. 37 | cls.__instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 38 | # Return the instance. 39 | return cls.__instances[cls] 40 | -------------------------------------------------------------------------------- /reinstall.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | PIP=pip 5 | 6 | echo 'Uninstalling stuff' 7 | ${PIP} uninstall -y nemo_toolkit 8 | 9 | # Kept for legacy purposes 10 | ${PIP} uninstall -y nemo_asr 11 | ${PIP} uninstall -y nemo_nlp 12 | ${PIP} uninstall -y nemo_tts 13 | ${PIP} uninstall -y nemo_simple_gan 14 | 15 | ${PIP} install -U setuptools 16 | 17 | for f in $(ls requirements/*.txt); do ${PIP} install ${PIP_FLAGS}--disable-pip-version-check --no-cache-dir -r $f; done 18 | 19 | echo 'Installing stuff' 20 | ${PIP} install -e ".[all]" 21 | 22 | echo 'All done!' 23 | -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.18.2 2 | onnx>=1.7.0 3 | pytorch-lightning==0.9.0 4 | python-dateutil 5 | torch 6 | wget 7 | wrapt 8 | ruamel.yaml 9 | scikit-learn 10 | omegaconf==2.0.1rc12 11 | hydra-core==1.0.0rc4 12 | transformers>=3.1.0 13 | -------------------------------------------------------------------------------- /requirements/requirements_asr.txt: -------------------------------------------------------------------------------- 1 | braceexpand 2 | editdistance 3 | frozendict 4 | inflect 5 | kaldi-io 6 | librosa 7 | marshmallow 8 | packaging 9 | num2words 10 | ruamel.yaml 11 | soundfile 12 | sox 13 | torch-stft 14 | unidecode 15 | webdataset 16 | kaldi-python-io 17 | scipy 18 | pandas 19 | -------------------------------------------------------------------------------- /requirements/requirements_cv.txt: -------------------------------------------------------------------------------- 1 | pillow 2 | torchvision 3 | -------------------------------------------------------------------------------- /requirements/requirements_docs.txt: -------------------------------------------------------------------------------- 1 | latexcodec 2 | sphinx_rtd_theme 3 | sphinxcontrib-bibtex 4 | wrapt 5 | -------------------------------------------------------------------------------- /requirements/requirements_nlp.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | h5py 3 | matplotlib>=3.3.2 4 | sentencepiece 5 | torchtext 6 | unidecode 7 | youtokentome 8 | numpy 9 | tqdm>=4.41.0 10 | rapidfuzz 11 | gdown 12 | megatron-lm>=1.1.4 13 | inflect 14 | -------------------------------------------------------------------------------- /requirements/requirements_simple_gan.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | -------------------------------------------------------------------------------- /requirements/requirements_test.txt: -------------------------------------------------------------------------------- 1 | black==19.10b0 2 | isort[requirements] < 5 3 | parameterized 4 | pytest 5 | pytest-runner 6 | ruamel.yaml 7 | sphinx 8 | sphinxcontrib-bibtex 9 | wrapt 10 | wget 11 | wandb 12 | -------------------------------------------------------------------------------- /requirements/requirements_tts.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | pypinyin 3 | attrdict 4 | -------------------------------------------------------------------------------- /scripts/convasr_to_onnx.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import argparse 15 | 16 | from nemo.collections.asr.models import EncDecClassificationModel, EncDecCTCModel, EncDecSpeakerLabelModel 17 | from nemo.utils import logging 18 | 19 | 20 | def get_parser(): 21 | parser = argparse.ArgumentParser(description="Convert .nemo file to encoder decoder onnx files") 22 | parser.add_argument( 23 | "--nemo_file", default=None, type=str, required=True, help="Path to .nemo file", 24 | ) 25 | parser.add_argument( 26 | "--onnx_encoder", default=None, type=str, required=True, help="Path to the onnx encoder output.", 27 | ) 28 | parser.add_argument( 29 | "--onnx_decoder", default=None, type=str, required=True, help="Path to the onnx decoder output.", 30 | ) 31 | parser.add_argument( 32 | "--model_type", 33 | default='asr', 34 | type=str, 35 | choices=['asr', 'speech_label', 'speaker'], 36 | help="Type of decoder used by the model.", 37 | ) 38 | return parser 39 | 40 | 41 | def main( 42 | nemo_file, onnx_encoder, onnx_decoder, model_type='asr', 43 | ): 44 | if model_type == 'asr': 45 | logging.info("Preparing encoder decoder for ASR model") 46 | model = EncDecCTCModel.restore_from(nemo_file) 47 | elif model_type == 'speech_label': 48 | logging.info("Preparing encoder decoder for Speech Label Classification model") 49 | model = EncDecClassificationModel.restore_from(nemo_file) 50 | elif model_type == 'speaker': 51 | logging.info("Preparing encoder decoder for Speaker Recognition model") 52 | model = EncDecSpeakerLabelModel.restore_from(nemo_file) 53 | else: 54 | raise NameError("Available model names are asr, speech_label and speaker ") 55 | 56 | logging.info("Writing onnx encoder and decoder onnx files") 57 | model.encoder.export(onnx_encoder) 58 | model.decoder.export(onnx_decoder, onnx_opset_version=10) 59 | logging.info("succesfully ported onnx files") 60 | 61 | 62 | if __name__ == "__main__": 63 | args = get_parser().parse_args() 64 | main( 65 | args.nemo_file, args.onnx_encoder, args.onnx_decoder, model_type=args.model_type, 66 | ) 67 | -------------------------------------------------------------------------------- /scripts/freesound_download_resample/freesound_requirements.txt: -------------------------------------------------------------------------------- 1 | git+git://github.com/MTG/freesound-python.git 2 | requests 3 | requests_oauthlib 4 | joblib 5 | librosa 6 | sox -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [aliases] 16 | test=pytest 17 | 18 | # durations=0 will display all tests execution time, sorted in ascending order starting from from the slowest one. 19 | # -vv will also display tests with durration = 0.00s 20 | [tool:pytest] 21 | addopts = --verbose --pyargs --durations=0 22 | markers = 23 | unit: marks unit test, i.e. testing a single, well isolated functionality (deselect with '-m "not unit"') 24 | integration: marks test checking the elements when integrated into subsystems (deselect with '-m "not integration"') 25 | system: marks test working at the highest integration level (deselect with '-m "not system"') 26 | acceptance: marks test checking whether the developed product/model passes the user defined acceptance criteria (deselect with '-m "not acceptance"') 27 | docs: mark tests related to documentation (deselect with '-m "not docs"') 28 | skipduringci: marks tests that are skipped ci as they are addressed by Jenkins jobs but should be run to test user setups 29 | pleasefixme: marks tests that are broken and need fixing 30 | 31 | [isort] 32 | known_localfolder = nemo,tests 33 | sections = FUTURE,STDLIB,THIRDPARTY,LOCALFOLDER 34 | default_section = THIRDPARTY 35 | #TODO tests/unit/core/test_deploy_export.py gets screwed by isort 36 | skip = setup.py, docs/source/conf.py, nemo/utils/__init__.py, tests/unit/core/test_deploy_export.py 37 | -------------------------------------------------------------------------------- /tests/collections/asr/test_speaker_label_models.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from unittest import TestCase 16 | 17 | import pytest 18 | from omegaconf import DictConfig 19 | 20 | from nemo.collections.asr.models import EncDecSpeakerLabelModel 21 | 22 | 23 | class EncDecSpeechLabelModelTest(TestCase): 24 | @pytest.mark.unit 25 | def test_constructor(self): 26 | preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} 27 | encoder = { 28 | 'cls': 'nemo.collections.asr.modules.ConvASREncoder', 29 | 'params': { 30 | 'feat_in': 64, 31 | 'activation': 'relu', 32 | 'conv_mask': True, 33 | 'jasper': [ 34 | { 35 | 'filters': 512, 36 | 'repeat': 1, 37 | 'kernel': [1], 38 | 'stride': [1], 39 | 'dilation': [1], 40 | 'dropout': 0.0, 41 | 'residual': False, 42 | 'separable': False, 43 | } 44 | ], 45 | }, 46 | } 47 | 48 | decoder = { 49 | 'cls': 'nemo.collections.asr.modules.SpeakerDecoder', 50 | 'params': {'feat_in': 512, 'num_classes': 2, 'pool_mode': 'xvector', 'emb_sizes': [1024]}, 51 | } 52 | 53 | modelConfig = DictConfig( 54 | {'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder)} 55 | ) 56 | speaker_model = EncDecSpeakerLabelModel(cfg=modelConfig) 57 | speaker_model.train() 58 | # TODO: make proper config and assert correct number of weights 59 | 60 | # Check to/from config_dict: 61 | confdict = speaker_model.to_config_dict() 62 | instance2 = EncDecSpeakerLabelModel.from_config_dict(confdict) 63 | self.assertTrue(isinstance(instance2, EncDecSpeakerLabelModel)) 64 | -------------------------------------------------------------------------------- /tests/collections/nlp/test_classification_report.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from unittest import TestCase 17 | 18 | import pytest 19 | import torch 20 | from sklearn.metrics import precision_recall_fscore_support 21 | 22 | from nemo.collections.nlp.metrics.classification_report import ClassificationReport 23 | 24 | 25 | class ClassificationReportTests(TestCase): 26 | num_classes = 3 27 | label_ids = {'a': 0, 'b': 1, 'c': 2} 28 | 29 | @pytest.mark.unit 30 | def test_classification_report(self): 31 | classification_report_nemo = ClassificationReport(num_classes=self.num_classes, label_ids=self.label_ids) 32 | 33 | preds = torch.Tensor([0, 1, 1, 1, 2, 2, 0]) 34 | labels = torch.Tensor([1, 0, 0, 1, 2, 1, 0]) 35 | 36 | tp, fp, fn = classification_report_nemo(preds, labels) 37 | 38 | def __convert_to_tensor(sklearn_metric): 39 | return torch.Tensor([round(sklearn_metric * 100)])[0] 40 | 41 | for mode in ['macro', 'micro', 'weighted']: 42 | 43 | precision, recall, f1 = classification_report_nemo.get_precision_recall_f1(tp, fn, fp, mode) 44 | pr_sklearn, recall_sklearn, f1_sklearn, _ = precision_recall_fscore_support(labels, preds, average=mode) 45 | 46 | self.assertEqual(torch.round(precision), __convert_to_tensor(pr_sklearn), f'wrong precision for {mode}') 47 | self.assertEqual(torch.round(recall), __convert_to_tensor(recall_sklearn), f'wrong recall for {mode}') 48 | self.assertEqual(torch.round(f1), __convert_to_tensor(f1_sklearn), f'wrong f1 for {mode}') 49 | -------------------------------------------------------------------------------- /tests/collections/nlp/test_megatron.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | try: 16 | import apex 17 | 18 | apex_available = True 19 | except Exception: 20 | apex_available = False 21 | 22 | import os 23 | import tempfile 24 | from unittest import TestCase 25 | 26 | import onnx 27 | import pytest 28 | import torch 29 | 30 | import nemo.collections.nlp as nemo_nlp 31 | 32 | 33 | class TestMegatron(TestCase): 34 | @pytest.mark.run_only_on('GPU') 35 | @pytest.mark.unit 36 | def test_list_pretrained_models(self): 37 | pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list() 38 | self.assertTrue(len(pretrained_lm_models) > 0) 39 | 40 | @pytest.mark.run_only_on('GPU') 41 | @pytest.mark.unit 42 | def test_get_pretrained_bert_345m_uncased_model(self): 43 | model_name = "megatron-bert-345m-uncased" 44 | model = nemo_nlp.modules.get_lm_model(pretrained_model_name=model_name) 45 | if torch.cuda.is_available(): 46 | model = model.cuda() 47 | 48 | assert isinstance(model, nemo_nlp.modules.MegatronBertEncoder) 49 | 50 | if False: # apex_available: 51 | model = apex.amp.initialize(model, opt_level="O2") 52 | with tempfile.TemporaryDirectory() as tmpdir: 53 | # Generate filename in the temporary directory. 54 | tmp_file_name = os.path.join(model_name + ".onnx") 55 | # Test export. 56 | model.export(tmp_file_name, check_trace=False) 57 | modelX = onnx.load(tmp_file_name) 58 | with open(tmp_file_name + '.txt', 'w') as o: 59 | o.write('Model :\n\n{}'.format(onnx.helper.printable_graph(modelX.graph))) 60 | -------------------------------------------------------------------------------- /tests/collections/nlp/test_nlp_exportables.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | import tempfile 16 | 17 | import pytest 18 | from omegaconf import DictConfig 19 | 20 | from nemo.collections.nlp.modules.common import ( 21 | BertPretrainingTokenClassifier, 22 | SequenceClassifier, 23 | SequenceRegression, 24 | SequenceTokenClassifier, 25 | TokenClassifier, 26 | ) 27 | 28 | 29 | def classifier_export(obj): 30 | with tempfile.TemporaryDirectory() as tmpdir: 31 | filename = os.path.join(tmpdir, obj.__class__.__name__ + '.onnx') 32 | obj = obj.cuda() 33 | obj.export(output=filename) 34 | 35 | 36 | class TestExportableClassifiers: 37 | @pytest.mark.run_only_on('GPU') 38 | @pytest.mark.unit 39 | def test_token_classifier_export_to_onnx(self): 40 | for num_layers in [1, 2, 4]: 41 | classifier_export(TokenClassifier(hidden_size=256, num_layers=num_layers, num_classes=16)) 42 | 43 | @pytest.mark.run_only_on('GPU') 44 | @pytest.mark.unit 45 | def test_bert_pretraining_export_to_onnx(self): 46 | for num_layers in [1, 2, 4]: 47 | classifier_export(TokenClassifier(hidden_size=256, num_layers=num_layers, num_classes=16)) 48 | 49 | @pytest.mark.run_only_on('GPU') 50 | @pytest.mark.unit 51 | def test_sequence_token_classifier_export_to_onnx(self): 52 | for num_layers in [1, 2, 4]: 53 | classifier_export( 54 | SequenceTokenClassifier(hidden_size=256, num_slots=8, num_intents=8, num_layers=num_layers) 55 | ) 56 | 57 | @pytest.mark.run_only_on('GPU') 58 | @pytest.mark.unit 59 | def test_sequence_classifier_export_to_onnx(self): 60 | for num_layers in [1, 2, 4]: 61 | classifier_export(SequenceClassifier(hidden_size=256, num_classes=16, num_layers=num_layers)) 62 | 63 | @pytest.mark.run_only_on('GPU') 64 | @pytest.mark.unit 65 | def test_sequence_regression_export_to_onnx(self): 66 | for num_layers in [1, 2, 4]: 67 | classifier_export(SequenceRegression(hidden_size=256, num_layers=num_layers)) 68 | -------------------------------------------------------------------------------- /tests/collections/tts/test_waveglow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import tempfile 17 | from unittest import TestCase 18 | 19 | import pytest 20 | from omegaconf import DictConfig 21 | 22 | from nemo.collections.tts.models import WaveGlowModel 23 | from nemo.collections.tts.modules import WaveGlowModule 24 | 25 | wcfg = DictConfig( 26 | { 27 | "n_flows": 12, 28 | "n_group": 8, 29 | "n_mel_channels": 80, 30 | "n_early_every": 4, 31 | "n_early_size": 2, 32 | "n_wn_channels": 512, 33 | "n_wn_layers": 8, 34 | "wn_kernel_size": 3, 35 | } 36 | ) 37 | 38 | 39 | class TestWaveGlow: 40 | @pytest.mark.run_only_on('GPU') 41 | @pytest.mark.unit 42 | def test_export_to_onnx(self): 43 | model = WaveGlowModule(**wcfg).cuda().half() 44 | with tempfile.TemporaryDirectory() as tmpdir: 45 | # Generate filename in the temporary directory. 46 | tmp_file_name = os.path.join("waveglow.onnx") 47 | # Test export. 48 | model.export(tmp_file_name, check_trace=False) 49 | -------------------------------------------------------------------------------- /tests/test_data_dir.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from os.path import exists, join 17 | 18 | import pytest 19 | 20 | 21 | class TestDataDir: 22 | @pytest.mark.unit 23 | def test_test_data_dir(self, test_data_dir): 24 | """" Just a dummy tests showing how to use the test_data_dir fixture. """ 25 | # test_data_dir contains the absolute path to nemo -> tests/.data 26 | assert exists(test_data_dir) 27 | assert exists(join(test_data_dir, "test_data.tar.gz")) 28 | -------------------------------------------------------------------------------- /tools/speech_data_explorer/README.md: -------------------------------------------------------------------------------- 1 | Speech Data Explorer 2 | -------------------- 3 | 4 | [Dash](https://plotly.com/dash/)-based tool for interactive exploration of ASR/TTS datasets. 5 | 6 | Features: 7 | - dataset's statistics (alphabet, vocabulary, duration-based histograms) 8 | - navigation across dataset (sorting, filtering) 9 | - inspection of individual utterances (waveform, spectrogram, audio player) 10 | 11 | Please make sure that requirements are installed. Then run: 12 | ``` 13 | python data_explorer.py path_to_manifest.json 14 | ``` 15 | 16 | ![Speech Data Explorer](screenshot.png) 17 | -------------------------------------------------------------------------------- /tools/speech_data_explorer/requirements.txt: -------------------------------------------------------------------------------- 1 | librosa 2 | dash 3 | dash_html_components 4 | dash_bootstrap_components 5 | plotly 6 | dash_core_components 7 | dash_table 8 | numpy 9 | -------------------------------------------------------------------------------- /tools/speech_data_explorer/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/NeMo/4e95fc64ff0a083008d1564515ff1643df1d781e/tools/speech_data_explorer/screenshot.png --------------------------------------------------------------------------------