├── deepasr ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── utils.cpython-36.pyc │ │ ├── utils.cpython-37.pyc │ │ ├── __init__.cpython-36.pyc │ │ └── __init__.cpython-37.pyc │ ├── getmeta.py │ └── utils.py ├── vocab │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── alphabet.cpython-36.pyc │ │ └── alphabet.cpython-37.pyc │ ├── alphabet-en.txt │ └── alphabet.py ├── evaluate │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── distance.cpython-37.pyc │ │ └── evaluate.cpython-37.pyc │ ├── evaluate.py │ ├── activations.py │ └── distance.py ├── decoder │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── decoder.cpython-36.pyc │ │ └── decoder.cpython-37.pyc │ └── decoder.py ├── augmentation │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── augmentation.cpython-36.pyc │ │ ├── augmentation.cpython-37.pyc │ │ ├── spec_augment.cpython-36.pyc │ │ └── spec_augment.cpython-37.pyc │ ├── augmentation.py │ └── spec_augment.py ├── __pycache__ │ └── __init__.cpython-37.pyc ├── features │ ├── __pycache__ │ │ ├── mfcc.cpython-36.pyc │ │ ├── mfcc.cpython-37.pyc │ │ ├── sigproc.cpython-36.pyc │ │ ├── sigproc.cpython-37.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── filter_banks.cpython-36.pyc │ │ ├── filter_banks.cpython-37.pyc │ │ ├── get_features.cpython-37.pyc │ │ ├── spectrogram.cpython-36.pyc │ │ ├── spectrogram.cpython-37.pyc │ │ ├── feature_extractor.cpython-36.pyc │ │ └── feature_extractor.cpython-37.pyc │ ├── __init__.py │ ├── filter_banks.py │ ├── get_features.py │ ├── feature_extractor.py │ ├── spectrogram.py │ ├── sigproc.py │ └── mfcc.py ├── model │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── deepspeech2.cpython-37.pyc │ │ └── deepasrnetwork1.cpython-37.pyc │ ├── __init__.py │ ├── compilemodel.py │ ├── deepasrnetwork1.py │ └── deepspeech2.py ├── pipeline │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── pipeline.cpython-36.pyc │ │ ├── pipeline.cpython-37.pyc │ │ ├── ctc_pipeline.cpython-36.pyc │ │ ├── ctc_pipeline.cpython-37.pyc │ │ └── get_pipeline.cpython-37.pyc │ ├── __init__.py │ ├── pipeline.py │ ├── get_pipeline.py │ └── ctc_pipeline.py └── __init__.py ├── setup.cfg ├── MANIFEST.in ├── setup.py ├── app.py ├── README.md ├── LICENSE └── DeepAsr_CTC_Pipeline.ipynb /deepasr/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | -------------------------------------------------------------------------------- /deepasr/vocab/__init__.py: -------------------------------------------------------------------------------- 1 | from .alphabet import Alphabet 2 | -------------------------------------------------------------------------------- /deepasr/evaluate/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluate import calculate_error_rates 2 | -------------------------------------------------------------------------------- /deepasr/decoder/__init__.py: -------------------------------------------------------------------------------- 1 | from .decoder import Decoder, GreedyDecoder, BeamSearchDecoder 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [egg_info] 2 | tag_build = 3 | tag_date = 0 4 | [metadata] 5 | description-file = README.md -------------------------------------------------------------------------------- /deepasr/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .augmentation import Augmentation 2 | from .spec_augment import SpecAugment 3 | -------------------------------------------------------------------------------- /deepasr/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/utils/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/utils/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/utils/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/utils/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/mfcc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/mfcc.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/mfcc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/mfcc.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/model/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/model/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/model/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/model/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/vocab/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/vocab/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/vocab/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/vocab/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/vocab/__pycache__/alphabet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/vocab/__pycache__/alphabet.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/vocab/__pycache__/alphabet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/vocab/__pycache__/alphabet.cpython-37.pyc -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include app.py 4 | 5 | include deepasr/vocab/*.txt # Alphabets 6 | recursive-include deepasr *.py -------------------------------------------------------------------------------- /deepasr/decoder/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/decoder/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/decoder/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/decoder/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/decoder/__pycache__/decoder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/decoder/__pycache__/decoder.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/decoder/__pycache__/decoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/decoder/__pycache__/decoder.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/sigproc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/sigproc.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/sigproc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/sigproc.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/evaluate/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/evaluate/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/evaluate/__pycache__/distance.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/evaluate/__pycache__/distance.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/evaluate/__pycache__/evaluate.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/evaluate/__pycache__/evaluate.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/model/__pycache__/deepspeech2.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/model/__pycache__/deepspeech2.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/pipeline/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/pipeline/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/pipeline/__pycache__/pipeline.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/pipeline.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/pipeline/__pycache__/pipeline.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/pipeline.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/augmentation/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/augmentation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/filter_banks.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/filter_banks.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/filter_banks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/filter_banks.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/get_features.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/get_features.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/spectrogram.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/spectrogram.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/spectrogram.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/spectrogram.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .compilemodel import compile_model 2 | from .deepspeech2 import get_deepspeech2 3 | from .deepasrnetwork1 import get_deepasrnetwork1 4 | -------------------------------------------------------------------------------- /deepasr/model/__pycache__/deepasrnetwork1.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/model/__pycache__/deepasrnetwork1.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/pipeline/__pycache__/ctc_pipeline.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/ctc_pipeline.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/pipeline/__pycache__/ctc_pipeline.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/ctc_pipeline.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/pipeline/__pycache__/get_pipeline.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/get_pipeline.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/augmentation/__pycache__/augmentation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/augmentation.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/augmentation/__pycache__/augmentation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/augmentation.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/augmentation/__pycache__/spec_augment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/spec_augment.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/augmentation/__pycache__/spec_augment.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/spec_augment.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/feature_extractor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/feature_extractor.cpython-36.pyc -------------------------------------------------------------------------------- /deepasr/features/__pycache__/feature_extractor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/feature_extractor.cpython-37.pyc -------------------------------------------------------------------------------- /deepasr/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline import Pipeline 2 | from .ctc_pipeline import CTCPipeline 3 | from .get_pipeline import load 4 | # from .get_pipeline import load_checkpoint 5 | -------------------------------------------------------------------------------- /deepasr/augmentation/augmentation.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import numpy as np 3 | 4 | 5 | class Augmentation: 6 | 7 | @abc.abstractmethod 8 | def __call__(self, batch_features: np.ndarray) -> np.ndarray: 9 | pass 10 | -------------------------------------------------------------------------------- /deepasr/features/__init__.py: -------------------------------------------------------------------------------- 1 | from .feature_extractor import FeaturesExtractor 2 | from .filter_banks import FilterBanks 3 | from .spectrogram import Spectrogram 4 | from . import mfcc 5 | from . import sigproc 6 | from .get_features import preprocess 7 | -------------------------------------------------------------------------------- /deepasr/__init__.py: -------------------------------------------------------------------------------- 1 | from . import augmentation 2 | from . import decoder 3 | from . import evaluate 4 | from . import features 5 | from . import model 6 | from . import pipeline 7 | from . import utils 8 | from . import vocab 9 | 10 | # Version of the deepasr package 11 | __version__ = "0.1.1" 12 | -------------------------------------------------------------------------------- /deepasr/utils/getmeta.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from tinytag import TinyTag 3 | 4 | 5 | # https://pypi.org/project/tinytag/ 6 | 7 | def get_file_tags(audio_file): 8 | tag = TinyTag.get(audio_file) 9 | print(tag.filesize, '|', tag.audio_offest, "|", tag.bitrate, "|", tag.channels, "|", tag.duration, "|", 10 | tag.samplerate, "|", tag.audio_offset) 11 | 12 | 13 | if __name__ == "__main__": 14 | get_file_tags(sys.argv[1]) 15 | -------------------------------------------------------------------------------- /deepasr/vocab/alphabet-en.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Alphabet is the list of valid characters. There are two special characters: 3 | # - space: on the beginning 4 | # - blank: default added as the last char 5 | # 6 | # To comment the line use `#` 7 | # 8 | 9 | a 10 | b 11 | c 12 | d 13 | e 14 | f 15 | g 16 | h 17 | i 18 | j 19 | k 20 | l 21 | m 22 | n 23 | o 24 | p 25 | q 26 | r 27 | s 28 | t 29 | u 30 | v 31 | w 32 | x 33 | y 34 | z 35 | ' 36 | 37 | # End of vocabulary 38 | # The last (non-comment) blank line represent the blank token -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open('README.md') as f: 4 | long_description = f.read() 5 | 6 | setuptools.setup( 7 | name="deepasr", 8 | version="0.1.2", 9 | author="Sai Kumar Yava", 10 | author_email="saikumar.geek@gmail.com", 11 | description="Keras(Tensorflow) implementations of Automatic Speech Recognition", 12 | long_description=long_description, 13 | long_description_content_type='text/markdown', 14 | url="https://github.com/scionoftech/DeepAsr", 15 | include_package_data=True, 16 | packages=['deepasr'], 17 | keywords=['deepspeech', 'asr', 'speech recognition', 'speech to text'], 18 | license='GNU', 19 | install_requires=['tensorflow>=2.0', 'pandas', 'tables', 'scipy', 'librosa'], 20 | python_requires='>=3.6', 21 | ) 22 | -------------------------------------------------------------------------------- /deepasr/features/filter_banks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .mfcc import fbank 3 | from . import feature_extractor 4 | 5 | 6 | class FilterBanks(feature_extractor.FeaturesExtractor): 7 | 8 | def __init__(self, features_num: int, samplerate: int = 16000, is_standardization=True, **kwargs): 9 | self.features_num = features_num 10 | self.is_standardization = is_standardization 11 | self.params = kwargs 12 | self.samplerate = samplerate 13 | 14 | def make_features(self, audio: np.ndarray) -> np.ndarray: 15 | """ Use `python_speech_features` lib to extract log filter banks from 16 | the features file. """ 17 | audio = self.normalize(audio.astype(np.float32)) 18 | audio = (audio * np.iinfo(np.int16).max).astype(np.int16) 19 | feat, energy = fbank( 20 | audio, nfilt=self.features_num, samplerate=self.samplerate, **self.params 21 | ) 22 | features = np.log(feat) 23 | return self.standardize(features) if self.is_standardization else features 24 | -------------------------------------------------------------------------------- /deepasr/features/get_features.py: -------------------------------------------------------------------------------- 1 | from .filter_banks import FilterBanks 2 | from .spectrogram import Spectrogram 3 | 4 | 5 | def preprocess(feature_type: str = 'fbank', features_num: int = 161, 6 | samplerate: int = 16000, 7 | winlen: float = 0.02, 8 | winstep: float = 0.01, 9 | winfunc=None, 10 | is_standardization=True, 11 | pad_audio_to: int = 0): 12 | ''' This method extracts the audio features based on fbank or spectrogram ''' 13 | if feature_type == 'fbank': 14 | features_extractor = FilterBanks(features_num=features_num, samplerate=samplerate, winlen=winlen, 15 | winstep=winstep, winfunc=winfunc, 16 | is_standardization=is_standardization) 17 | return features_extractor 18 | elif feature_type == 'spectrogram': 19 | features_extractor = Spectrogram( 20 | features_num=features_num, 21 | samplerate=samplerate, 22 | winlen=winlen, 23 | winstep=winstep, 24 | winfunc=winfunc, 25 | pad_audio_to=pad_audio_to 26 | ) 27 | return features_extractor 28 | -------------------------------------------------------------------------------- /deepasr/pipeline/pipeline.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | import numpy as np 4 | import pandas as pd 5 | from tensorflow import keras 6 | import sys 7 | 8 | sys.path.append("..") 9 | from deepasr.decoder import Decoder 10 | from deepasr.features import FeaturesExtractor 11 | from deepasr.vocab import Alphabet 12 | 13 | 14 | class Pipeline: 15 | 16 | @property 17 | @abc.abstractmethod 18 | def alphabet(self) -> Alphabet: 19 | pass 20 | 21 | @property 22 | @abc.abstractmethod 23 | def features_extractor(self) -> FeaturesExtractor: 24 | pass 25 | 26 | @property 27 | @abc.abstractmethod 28 | def model(self) -> keras.Model: 29 | pass 30 | 31 | @property 32 | @abc.abstractmethod 33 | def decoder(self) -> Decoder: 34 | pass 35 | 36 | @abc.abstractmethod 37 | def fit(self, 38 | train_dataset: pd.DataFrame, 39 | val_dataset: pd.DataFrame, 40 | prepared_features=False, 41 | **kwargs) -> keras.callbacks.History: 42 | pass 43 | 44 | @abc.abstractmethod 45 | def predict(self, batch_audio: List[np.ndarray], **kwargs) -> List[str]: 46 | pass 47 | 48 | @abc.abstractmethod 49 | def save(self, directory: str): 50 | pass 51 | -------------------------------------------------------------------------------- /deepasr/pipeline/get_pipeline.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from deepasr.utils import load_data 4 | from deepasr.pipeline import CTCPipeline 5 | from deepasr.model import compile_model 6 | 7 | 8 | def load(directory: str): 9 | """ Load each component of the CTC pipeline. """ 10 | 11 | _label_len = load_data(os.path.join(directory, 'label_len.bin')) 12 | _optimizer = load_data(os.path.join(directory, 'optimizer.bin')) 13 | _network = tf.keras.models.load_model(os.path.join(directory, 'network.h5')) 14 | _model = _network 15 | _model = compile_model(_model, _optimizer, _label_len) 16 | _model.load_weights(os.path.join(directory, 'model_weights.h5')) 17 | _alphabet = load_data(os.path.join(directory, 'alphabet.bin')) 18 | _decoder = load_data(os.path.join(directory, 'decoder.bin')) 19 | _features_extractor = load_data( 20 | os.path.join(directory, 'feature_extractor.bin')) 21 | _multi_gpu_flag = load_data(os.path.join(directory, 'multi_gpu_flag.bin')) 22 | _sample_rate = load_data(os.path.join(directory, 'sample_rate.bin')) 23 | _mono = load_data(os.path.join(directory, 'mono.bin')) 24 | 25 | pipeline = CTCPipeline( 26 | alphabet=_alphabet, features_extractor=_features_extractor, model=_model, optimizer=_optimizer, 27 | decoder=_decoder, sample_rate=_sample_rate, mono=_mono, label_len=_label_len, multi_gpu=_multi_gpu_flag, 28 | temp_model=_network 29 | ) 30 | return pipeline 31 | -------------------------------------------------------------------------------- /deepasr/model/compilemodel.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import * 2 | from tensorflow.keras.models import Model 3 | import tensorflow.keras.backend as K 4 | import logging 5 | 6 | logger = logging.getLogger('asr.pipeline') 7 | 8 | 9 | def ctc_loss(args): 10 | """ The CTC loss using TensorFlow's `ctc_loss`. """ 11 | y_pred, labels, input_length, label_length = args 12 | return K.ctc_batch_cost(labels, y_pred, input_length, label_length) 13 | 14 | 15 | def compile_model(_model, _optimizer, label_len=None): 16 | """ The compiled model means the model configured for training. """ 17 | 18 | input_data = _model.inputs[0] 19 | y_pred = _model.outputs[0] 20 | 21 | # your ground truth data. The data you are going to compare with the model's outputs in training 22 | labels = Input(name='the_labels', shape=[label_len], dtype='float32') 23 | # the length (in steps, or chars this case) of each sample (sentence) in the y_pred tensor 24 | input_length = Input(name='input_length', shape=[1], dtype='float32') 25 | # the length (in steps, or chars this case) of each sample (sentence) in the y_true 26 | label_length = Input(name='label_length', shape=[1], dtype='float32') 27 | output = Lambda(ctc_loss, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) 28 | _model = Model(inputs=[input_data, labels, input_length, label_length], outputs=output, 29 | name="DeepAsr") 30 | _model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=_optimizer, 31 | metrics=['accuracy']) 32 | 33 | # _model.summary() 34 | logger.info("Model is successfully compiled") 35 | return _model 36 | -------------------------------------------------------------------------------- /deepasr/decoder/decoder.py: -------------------------------------------------------------------------------- 1 | import abc 2 | # import itertools 3 | from typing import List 4 | import numpy as np 5 | from tensorflow.keras import backend as K 6 | 7 | 8 | # https://www.tensorflow.org/api_docs/python/tf/keras/backend/ctc_decode 9 | 10 | class Decoder: 11 | 12 | @abc.abstractmethod 13 | def __call__(self, batch_logits: np.ndarray, input_length: int) -> List[np.ndarray]: 14 | pass 15 | 16 | 17 | class GreedyDecoder: 18 | 19 | def __call__(self, batch_logits: np.ndarray, input_length: int) -> List[np.ndarray]: 20 | """ Decode the best guess from logits using greedy algorithm. """ 21 | # Choose the class with maximum probability 22 | # best_candidates = np.argmax(batch_logits, axis=2) 23 | # Merge repeated chars 24 | # decoded = [np.array([k for k, _ in itertools.groupby(best_candidate)]) 25 | # for best_candidate in best_candidates] 26 | decoded = np.array( 27 | (K.eval(K.ctc_decode(batch_logits, [input_length], greedy=True)[0][0])).flatten().tolist()) 28 | return [decoded] 29 | 30 | 31 | class BeamSearchDecoder: 32 | 33 | def __init__(self, beam_width: int, top_paths: int): 34 | self.beam_width = beam_width 35 | self.top_paths = top_paths 36 | 37 | def __call__(self, batch_logits: np.ndarray, input_length: int, **kwargs) -> List[ 38 | np.ndarray]: 39 | """ Decode the best guess from logits using beam search algorithm. """ 40 | decoded = np.array((K.eval( 41 | K.ctc_decode(batch_logits, [input_length], greedy=False, beam_width=self.beam_width, 42 | top_paths=self.top_paths)[0][ 43 | 0])).flatten().tolist()) 44 | return [decoded] 45 | -------------------------------------------------------------------------------- /deepasr/features/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List, Tuple 3 | import numpy as np 4 | 5 | 6 | class FeaturesExtractor: 7 | 8 | def __index__(self): 9 | self.features_shape = None 10 | 11 | def __call__(self, batch_audio: List[np.ndarray]) -> np.ndarray: 12 | """ Extract features from the file list. """ 13 | features = [self.make_features(audio) for audio in batch_audio] 14 | self.features_shape = max(features, key=len).shape 15 | X = self.align(features, self.features_shape) 16 | return X.astype(np.float16) 17 | 18 | @abc.abstractmethod 19 | def make_features(self, audio: np.ndarray) -> np.ndarray: 20 | pass 21 | 22 | @staticmethod 23 | def standardize(features: np.ndarray) -> np.ndarray: 24 | """ Standardize globally, independently of features. """ 25 | mean = np.mean(features) 26 | std = np.std(features) 27 | return (features - mean) / std 28 | 29 | @staticmethod 30 | def normalize(audio: np.ndarray): 31 | """ Normalize float32 signal to [-1, 1] range. """ 32 | gain = 1.0 / (np.max(np.abs(audio)) + 1e-5) 33 | return audio * gain 34 | 35 | @staticmethod 36 | def align(arrays: list, features_shape: Tuple, default=0) -> np.ndarray: 37 | """ Pad arrays (default along time dimensions). Return the single 38 | array (batch_size, time, features). """ 39 | # max_array = max(arrays, key=len) 40 | X = np.full(shape=[len(arrays), *features_shape], 41 | fill_value=default, dtype=float) 42 | for index, array in enumerate(arrays): 43 | time_dim, features_dim = array.shape 44 | X[index, :time_dim] = array 45 | return X 46 | -------------------------------------------------------------------------------- /deepasr/features/spectrogram.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from . import sigproc 4 | from . import feature_extractor 5 | 6 | 7 | class Spectrogram(feature_extractor.FeaturesExtractor): 8 | 9 | def __init__(self, 10 | features_num: int, 11 | samplerate: int, 12 | winlen: float, 13 | winstep: float, 14 | winfunc=None, 15 | is_standardization=True, 16 | pad_audio_to: int = 0): 17 | self.features_num = features_num 18 | self.winfunc = winfunc 19 | self.frame_len = int(winlen * samplerate) 20 | self.frame_step = int(winstep * samplerate) 21 | self.is_standardization = is_standardization 22 | self.pad_to = pad_audio_to 23 | 24 | def make_features(self, audio: np.ndarray) -> np.ndarray: 25 | """ Use `python_speech_features` lib to extract log-spectrogram's. """ 26 | audio = self.normalize(audio.astype(np.float32)) 27 | audio = (audio * np.iinfo(np.int16).max).astype(np.int16) 28 | audio = self.pad(audio) if self.pad_to else audio 29 | frames = sigproc.framesig( 30 | audio, self.frame_len, self.frame_step, self.winfunc 31 | ) 32 | features = sigproc.logpowspec( 33 | frames, self.frame_len, norm=1 34 | ) 35 | features = features[:, :self.features_num] # Cut high frequency part 36 | return self.standardize(features) if self.is_standardization else features 37 | 38 | def pad(self, audio: np.ndarray) -> np.ndarray: 39 | """ Padding signal is required if you play with mixed precession. """ 40 | length = 1 + int((len(audio) - self.frame_len) // self.frame_step + 1) 41 | pad_size = (self.pad_to - length % self.pad_to) * self.frame_step 42 | return np.pad(audio, (0, pad_size), mode='constant') 43 | -------------------------------------------------------------------------------- /deepasr/model/deepasrnetwork1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tensorflow import keras 4 | from tensorflow.keras.models import Model 5 | from tensorflow.keras.layers import * 6 | from tensorflow.keras.mixed_precision import experimental as mixed_precision 7 | 8 | 9 | def get_deepasrnetwork1(input_dim=None, output_dim=29, 10 | is_mixed_precision=True, random_state=1) -> keras.Model: 11 | """ 12 | 13 | input_dim: int i wielokrotność 4 14 | output_dim: licba liter w słowniku 15 | 16 | """ 17 | if is_mixed_precision: 18 | policy = mixed_precision.Policy('float32') 19 | mixed_precision.set_policy(policy) 20 | 21 | np.random.seed(random_state) 22 | tf.random.set_seed(random_state) 23 | 24 | # the input 25 | input_data = Input(name='the_input', shape=(None, input_dim), dtype='float32') 26 | 27 | # Batch normalize 28 | bn1 = BatchNormalization(axis=-1, name='BN_1')(input_data) 29 | 30 | # 1D Convs 31 | conv = Conv1D(filters=220, kernel_size=5, strides=1, padding='valid', activation='relu', name='Conv1D_1')(bn1) 32 | conv = BatchNormalization(name="CNBN_1")(conv) 33 | conv1 = Conv1D(filters=220, kernel_size=5, strides=1, padding='valid', activation='relu', name='Conv1D_2')(conv) 34 | conv1 = BatchNormalization(name="CNBN_2")(conv1) 35 | 36 | # RNN 37 | gru_1 = GRU(512, return_sequences=True, name='gru_1')(conv1) 38 | gru_2 = GRU(512, return_sequences=True, go_backwards=True, name='gru_2')(conv1) 39 | 40 | # merge tow gpu ouputs 41 | merged = concatenate([gru_1, gru_2]) 42 | # Batch normalize 43 | bn2 = BatchNormalization(axis=-1, name="BN_2")(merged) 44 | 45 | dense = TimeDistributed(Dense(30))(bn2) 46 | y_pred = TimeDistributed(Dense(output_dim, activation='softmax', name='y_pred'), name='the_output')(dense) 47 | 48 | model = Model(inputs=input_data, outputs=y_pred) 49 | 50 | return model 51 | -------------------------------------------------------------------------------- /deepasr/augmentation/spec_augment.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | import numpy as np 3 | 4 | 5 | class SpecAugment: 6 | 7 | def __init__(self, 8 | F: int = None, 9 | mf: int = None, 10 | Tmin: int = None, 11 | Tmax: int = None, 12 | mt: int = None): 13 | """ SpecAugment: A Simple Data Augmentation Method. """ 14 | self.F = F 15 | self.mf = mf 16 | self.Tmin = Tmin 17 | self.Tmax = Tmax 18 | self.mt = mt 19 | 20 | def __call__(self, batch_features: np.ndarray) -> np.ndarray: 21 | return np.stack([self.mask_features(features) for features in batch_features], axis=0) 22 | 23 | def mask_features(self, features: np.ndarray) -> np.ndarray: 24 | features = features.copy() 25 | time, channels = features.shape 26 | means = features.mean(axis=0) # The mean should be zero if features are normalized 27 | if self.F and self.mf: 28 | features = self.mask_frequencies(features, means, channels, self.F, self.mf) 29 | if self.Tmax and self.mt: 30 | features = self.mask_time(features, means, time, (self.Tmin, self.Tmax), self.mt) 31 | return features 32 | 33 | @staticmethod 34 | def mask_frequencies(features: np.ndarray, means: np.ndarray, channels: int, F: int, mf: int): 35 | for i in range(mf): 36 | f = np.random.random_integers(low=0, high=F) 37 | f0 = np.random.random_integers(low=0, high=channels - F) 38 | features[:, f0:f0 + f] = means[f0:f0 + f] 39 | return features 40 | 41 | @staticmethod 42 | def mask_time(features: np.ndarray, means: np.ndarray, time: int, T_range: Tuple[int, int], mt: int): 43 | Tmin, Tmax = T_range 44 | for i in range(mt): 45 | t = np.random.random_integers(low=Tmin, high=Tmax) 46 | t0 = np.random.random_integers(low=0, high=time - Tmax) 47 | features[t0:t0 + t, :] = means 48 | return features 49 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import tensorflow as tf 4 | import deepasr as asr 5 | 6 | 7 | # get CTCPipeline 8 | def get_config(feature_type: str = 'spectrogram', multi_gpu: bool = False): 9 | # audio feature extractor 10 | features_extractor = asr.features.preprocess(feature_type=feature_type, features_num=161, 11 | samplerate=16000, 12 | winlen=0.02, 13 | winstep=0.025, 14 | winfunc=np.hanning) 15 | 16 | # input label encoder 17 | alphabet_en = asr.vocab.Alphabet(lang='en') 18 | # training model 19 | model = asr.model.get_deepasrnetwork1( 20 | input_dim=161, 21 | output_dim=29, 22 | is_mixed_precision=True 23 | ) 24 | # model optimizer 25 | optimizer = tf.keras.optimizers.Adam( 26 | lr=1e-4, 27 | beta_1=0.9, 28 | beta_2=0.999, 29 | epsilon=1e-8 30 | ) 31 | # output label deocder 32 | decoder = asr.decoder.GreedyDecoder() 33 | # CTCPipeline 34 | pipeline = asr.pipeline.ctc_pipeline.CTCPipeline( 35 | alphabet=alphabet_en, features_extractor=features_extractor, model=model, optimizer=optimizer, decoder=decoder, 36 | sample_rate=16000, mono=True, multi_gpu=multi_gpu 37 | ) 38 | return pipeline 39 | 40 | 41 | def run(): 42 | 43 | train_data = pd.read_csv('train_data.csv') 44 | 45 | pipeline = get_config(feature_type = 'fbank', multi_gpu=False) 46 | 47 | # train asr model 48 | history = pipeline.fit(train_dataset=train_data, batch_size=128, epochs=500) 49 | # history = pipeline.fit_generator(train_dataset = train_data, batch_size=32, epochs=500) 50 | 51 | pipeline.save('./checkpoints') 52 | 53 | return history 54 | 55 | 56 | def test_model(test_data): 57 | test_data = pd.read_csv('test_data.csv') 58 | pipeline = asr.pipeline.load('checkpoints') 59 | print("Truth:", test_data['transcripts'].to_list()[0]) 60 | print("Prediction", pipeline.predict(test_data['path'].to_list()[0])) 61 | 62 | 63 | if __name__ == "__main__": 64 | run() 65 | # test_model(test) 66 | -------------------------------------------------------------------------------- /deepasr/evaluate/evaluate.py: -------------------------------------------------------------------------------- 1 | from typing import List, Iterable, Tuple, Union 2 | from collections import namedtuple 3 | import pandas as pd 4 | from . import distance 5 | # from .. import dataset 6 | from .. import pipeline 7 | 8 | Metric = namedtuple('Metric', ['transcript', 'prediction', 'wer', 'cer']) 9 | 10 | 11 | def calculate_error_rates(ctc_pipeline: pipeline.Pipeline, 12 | data: pd.DataFrame, 13 | return_metrics: bool = False 14 | ) -> Union[Tuple[float, float], pd.DataFrame]: 15 | """ Calculate base metrics: WER and CER. """ 16 | metrics = [] 17 | for audio, transcript in zip(data['path'].values, data['transcripts'].values): 18 | prediction = ctc_pipeline.predict(audio) 19 | batch_metrics = get_metrics(sources=prediction, 20 | destinations=[transcript]) 21 | metrics.extend(batch_metrics) 22 | metrics = pd.DataFrame(metrics) 23 | return metrics if return_metrics else (metrics.wer.mean(), metrics.cer.mean()) 24 | 25 | 26 | def get_metrics(sources: List[str], 27 | destinations: List[str]) -> Iterable[Metric]: 28 | """ Calculate base metrics in one batch: WER and CER. """ 29 | for source, destination in zip(sources, destinations): 30 | wer_distance, *_ = distance.edit_distance(source.split(), 31 | destination.split()) 32 | wer = wer_distance / len(destination.split()) 33 | 34 | cer_distance, *_ = distance.edit_distance(list(source), 35 | list(destination)) 36 | cer = cer_distance / len(destination) 37 | yield Metric(destination, source, wer, cer) 38 | 39 | 40 | def get_cer(source: str, destination: str) -> float: 41 | cer_distance, *_ = distance.edit_distance(list(source), 42 | list(destination)) 43 | cer = cer_distance / len(destination) 44 | 45 | return cer 46 | 47 | 48 | def get_wer(source: str, destination: str) -> float: 49 | wer_distance, *_ = distance.edit_distance(source.split(), 50 | destination.split()) 51 | wer = wer_distance / len(destination.split()) 52 | 53 | return wer 54 | -------------------------------------------------------------------------------- /deepasr/evaluate/activations.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import operator 3 | from typing import Callable, List, Union, Tuple 4 | import h5py 5 | import numpy as np 6 | import pandas as pd 7 | import tensorflow as tf 8 | from tensorflow import keras 9 | from . import evaluate 10 | # from .. import dataset 11 | from .. import pipeline 12 | from .. import utils 13 | 14 | 15 | def save_metrics_and_activations(pipeline: pipeline.Pipeline, 16 | data: pd.DataFrame, 17 | store_path: str, 18 | prepared_features: bool = False, 19 | return_metrics: bool = False 20 | ) -> Union[Tuple[float, float], pd.DataFrame]: 21 | columns = ['sample_id', 'transcript', 'prediction', 'wer', 'cer'] 22 | references = pd.DataFrame(columns=columns).set_index('sample_id') 23 | get_activations = get_activations_function(pipeline.model) 24 | 25 | with h5py.File(store_path, mode='w') as store: 26 | for audio, transcript in zip(data['path'].values, data['transcripts'].values): 27 | features = audio if prepared_features else pipeline.features_extractor([utils.read_audio(audio)]) 28 | *activations, y_hat = get_activations([features, 0]) 29 | decoded_labels = pipeline.decoder(y_hat) 30 | predictions = pipeline.alphabet.get_batch_transcripts(decoded_labels) 31 | batch_metrics = list(evaluate.get_metrics(sources=predictions, 32 | destinations=transcript)) 33 | 34 | save_in_store(store, [*activations, y_hat], batch_metrics, references) 35 | 36 | with pd.HDFStore(store_path, mode='r+') as store: 37 | store.put('references', references) 38 | metrics = pd.DataFrame(functools.reduce(operator.concat, batch_metrics)) 39 | return metrics if return_metrics else (metrics.wer.mean(), metrics.cer.mean()) 40 | 41 | 42 | def get_activations_function(model: keras.Model) -> Callable: 43 | """ Function which handle all activations through one pass. """ 44 | inputs = [model.input, tf.keras.learning_phase()] 45 | outputs = [layer.output for layer in model.layers][1:] 46 | return tf.keras.function(inputs, outputs) 47 | 48 | 49 | def save_in_store(store: h5py.File, 50 | layer_outputs: List[np.ndarray], 51 | metrics: List[evaluate.Metric], 52 | references: pd.DataFrame): 53 | """ Save batch data into HDF5 file. """ 54 | for index, metric in enumerate(metrics): 55 | sample_id = len(references) 56 | references.loc[sample_id] = metric 57 | for output_index, batch_layer_outputs in enumerate(layer_outputs): 58 | layer_output = batch_layer_outputs[index] 59 | store.create_dataset(f'outputs/{output_index}/{sample_id}', data=layer_output) 60 | -------------------------------------------------------------------------------- /deepasr/utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import logging 4 | from functools import reduce 5 | from logging import Logger 6 | from typing import Any 7 | import numpy as np 8 | import librosa 9 | # from scipy.io import wavfile 10 | from tensorflow import keras 11 | 12 | # from google.cloud import storage 13 | 14 | logger = logging.getLogger('asr.utils') 15 | 16 | 17 | def load_data(file_path: str): 18 | """ Load arbitrary python objects from the pickled file. """ 19 | with open(file_path, mode='rb') as file: 20 | return pickle.load(file) 21 | 22 | 23 | def save_data(data: Any, file_path: str): 24 | """ Save arbitrary python objects in the pickled file. """ 25 | with open(file_path, mode='wb') as file: 26 | pickle.dump(data, file) 27 | 28 | 29 | # def download_from_bucket(bucket_name: str, remote_path: str, local_path: str): 30 | # """ Download the file from the public bucket. """ 31 | # client = storage.Client.create_anonymous_client() 32 | # bucket = client.bucket(bucket_name) 33 | # blob = storage.Blob(remote_path, bucket) 34 | # blob.download_to_filename(local_path, client=client) 35 | 36 | 37 | # def maybe_download_from_bucket(bucket_name: str, remote_path: str, local_path: str): 38 | # """ Download file from the bucket if it does not exist. """ 39 | # if os.path.isfile(local_path): 40 | # return 41 | # directory = os.path.dirname(local_path) 42 | # os.makedirs(directory, exist_ok=True) 43 | # logger.info('Downloading file from the bucket...') 44 | # download_from_bucket(bucket_name, remote_path, local_path) 45 | 46 | 47 | def read_audio(file_path: str, sample_rate: int, mono: bool) -> np.ndarray: 48 | """ Read already prepared features from the store. """ 49 | # fs, audio = wavfile.read(file_path) 50 | audio = librosa.core.load(file_path, sr=sample_rate, mono=mono)[0] 51 | return audio 52 | 53 | 54 | def calculate_units(model: keras.Model) -> int: 55 | """ Calculate number of the model parameters. """ 56 | units = 0 57 | for parameters in model.get_weights(): 58 | units += reduce(lambda x, y: x * y, parameters.shape) 59 | return units 60 | 61 | 62 | def create_logger(file_path=None, level=20, name='asr') -> Logger: 63 | """ Create the logger and handlers both console and file. """ 64 | logger = logging.getLogger(name) 65 | logger.setLevel(level) 66 | formatter = logging.Formatter( 67 | '%(asctime)s [%(levelname)-8s] [%(name)-20s] %(message)s', 68 | datefmt='%Y-%m-%d %H:%M:%S' 69 | ) 70 | console = logging.StreamHandler() 71 | console.setFormatter(formatter) 72 | logger.addHandler(console) # handle all messages from logger 73 | if file_path: 74 | file_handler = logging.FileHandler(file_path, mode='w') 75 | file_handler.setFormatter(formatter) 76 | logger.addHandler(file_handler) 77 | return logger 78 | -------------------------------------------------------------------------------- /deepasr/vocab/alphabet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | import numpy as np 4 | 5 | 6 | class Alphabet: 7 | """ 8 | Read alphabet-pl.txt, which is the list of valid characters. Alphabet has 9 | two special characters: 10 | - space on the beginning 11 | - blank token default added as the last char 12 | 13 | This class is used to convert characters to labels and vice versa. 14 | """ 15 | 16 | def __init__(self, file_path: str = None, lang: str = None): 17 | self.size = 0 18 | self.blank_token = None 19 | self._label_to_str = [] 20 | self._str_to_label = {} 21 | if lang in ['en', 'pl']: 22 | directory = os.path.dirname(os.path.abspath(__file__)) 23 | file_path = os.path.join(directory, f'alphabet-{lang}.txt') 24 | elif not file_path: 25 | raise ValueError 26 | self.process_alphabet_file(file_path) 27 | 28 | def __contains__(self, char: str) -> bool: 29 | """ Check if char is in the Alphabet. """ 30 | return char in self._str_to_label 31 | 32 | def string_from_label(self, label: int) -> str: 33 | """ Convert label to string. """ 34 | return self._label_to_str[label] 35 | 36 | def label_from_string(self, string: str) -> int: 37 | """ Convert string to label. """ 38 | return self._str_to_label[string] 39 | 40 | def process_alphabet_file(self, file_path: str): 41 | """ Read alphabet-pl.txt file. """ 42 | with open(file_path) as file: 43 | for line in file: 44 | if line.startswith('#'): 45 | continue 46 | # Char can contain more than one letter 47 | char = line[:-1] # remove the line ending 48 | self._label_to_str.append(char) 49 | self._str_to_label[char] = self.size 50 | self.size += 1 51 | # Blank token is added on the end 52 | self.blank_token = self.size - 1 53 | 54 | def get_batch_labels(self, transcripts: List[str]) -> np.ndarray: 55 | """ Convert batch transcripts to labels """ 56 | batch_labels = [[self.label_from_string(c) for c in transcript.lower().strip() if c in self] 57 | for transcript in transcripts] 58 | max_len = max(map(len, batch_labels)) 59 | default_value = self.blank_token 60 | for labels in batch_labels: 61 | remainder = [default_value] * (max_len - len(labels)) 62 | labels.extend(remainder) 63 | return np.array(batch_labels) 64 | 65 | def get_batch_transcripts(self, sequences: List[np.ndarray]) -> List[str]: 66 | """ Convert label sequences to transcripts. The `-1` also means the 67 | blank tag """ 68 | return [''.join(self.string_from_label(char_label) 69 | for char_label in sequence 70 | if char_label not in (-1, self.blank_token)) 71 | for sequence in sequences] 72 | 73 | 74 | if __name__ == "__main__": 75 | al = Alphabet(lang='en') 76 | labels = al.get_batch_labels(["Hi how are you", "i am vey well, what about you"]) 77 | print(labels) 78 | -------------------------------------------------------------------------------- /deepasr/evaluate/distance.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from typing import Tuple, List 3 | from collections import defaultdict 4 | import numpy as np 5 | 6 | 7 | def edit_distance(source: List[str], 8 | destination: List[str]) -> Tuple[int, np.ndarray, np.ndarray]: 9 | """ 10 | Calculation of edit distance between two sequences. 11 | 12 | This is the Levenshtein distance with the substitution cost equals 1. 13 | It is the iterative method with the full matrix support. 14 | O(nm) time and space complexity. 15 | 16 | References: 17 | https://web.stanford.edu/class/cs124/lec/med.pdf 18 | https://www.python-course.eu/levenshtein_distance.php 19 | """ 20 | size_x = len(source) + 1 21 | size_y = len(destination) + 1 22 | matrix = np.zeros([size_x, size_y]) 23 | matrix[:, 0] = np.arange(0, size_x) 24 | matrix[0, :] = np.arange(0, size_y) 25 | backtrace = np.zeros_like(matrix, dtype=[('del', bool), 26 | ('sub', bool), 27 | ('ins', bool), 28 | ('cost', int)]) 29 | backtrace[:, 0] = (True, False, False, 0) 30 | backtrace[0, :] = (False, False, True, 0) 31 | for x, y in itertools.product(range(1, size_x), 32 | range(1, size_y)): 33 | if source[x - 1] == destination[y - 1]: 34 | cost = 0 35 | else: 36 | cost = 1 37 | delete = matrix[x - 1][y] + 1 38 | insert = matrix[x][y - 1] + 1 39 | substitute = matrix[x - 1][y - 1] + cost 40 | min_dist = min(delete, insert, substitute) 41 | matrix[x, y] = min_dist 42 | backtrace[x, y] = (delete == min_dist, 43 | substitute == min_dist, 44 | insert == min_dist, 45 | cost) 46 | return matrix[size_x - 1, size_y - 1], matrix, backtrace 47 | 48 | 49 | def simple_backtrace(backtrace: np.ndarray): 50 | """ Calculate the editing path via the backtrace. """ 51 | rows, columns = backtrace.shape 52 | i, j = rows - 1, columns - 1 53 | backtrace_indices = [(i, j, 'sub', 0)] 54 | while (i, j) != (0, 0): 55 | delete, substitute, insert, cost = backtrace[i, j] 56 | if insert: 57 | operation = 'ins' 58 | i, j = i, j - 1 59 | elif substitute: 60 | operation = 'sub' 61 | i, j = i - 1, j - 1 62 | elif delete: 63 | operation = 'del' 64 | i, j = i - 1, j 65 | else: 66 | raise KeyError("Backtrace matrix wrong defined") 67 | backtrace_indices.append((i, j, operation, cost)) 68 | return list(reversed(backtrace_indices)) 69 | 70 | 71 | def decode_path(best_path: List[Tuple[int, int, str, int]], 72 | source: List[str], 73 | destination: List[str]): 74 | """ Collect all transformations needed to go from `source` to 75 | `destination`. """ 76 | to_delete, to_insert, to_substitute = [], [], defaultdict(list) 77 | for index, (i, j, operation, cost) in enumerate(best_path): 78 | if operation == 'del': 79 | item = source[i] 80 | to_delete.append(item) 81 | elif operation == 'sub' and cost: 82 | # without cost sub operation indicates correctness 83 | wrong_item, target_item = source[i], destination[j] 84 | to_substitute[target_item].append(wrong_item) 85 | elif operation == 'ins': 86 | item = destination[j] 87 | to_insert.append(item) 88 | return to_delete, to_insert, to_substitute 89 | -------------------------------------------------------------------------------- /deepasr/model/deepspeech2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tensorflow import keras 4 | from tensorflow.keras.models import Model 5 | from tensorflow.keras.layers import * 6 | from tensorflow.keras.mixed_precision import experimental as mixed_precision 7 | # from tensorflow.keras.activations import relu 8 | 9 | 10 | # def clipped_relu(x): 11 | # return relu(x, max_value=20) 12 | 13 | 14 | def get_deepspeech2(input_dim=None, output_dim=29, 15 | is_mixed_precision=True, random_state=1) -> keras.Model: 16 | """ 17 | 18 | input_dim: int i wielokrotność 4 19 | output_dim: licba liter w słowniku 20 | 21 | """ 22 | if is_mixed_precision: 23 | policy = mixed_precision.Policy('float32') 24 | mixed_precision.set_policy(policy) 25 | 26 | np.random.seed(random_state) 27 | tf.random.set_seed(random_state) 28 | 29 | # the input 30 | input_data = Input(name='the_input', shape=(None, input_dim), dtype='float32') 31 | 32 | # Batch normalize 33 | bn1 = BatchNormalization(axis=-1, name='BN_1')(input_data) 34 | 35 | # 1D Convs 36 | conv1 = Conv1D(512, 5, strides=1, activation='relu', name='Conv1D_1')(bn1) 37 | cbn1 = BatchNormalization(axis=-1, name='CBN_1')(conv1) 38 | conv2 = Conv1D(512, 5, strides=1, activation='relu', name='Conv1D_2')(cbn1) 39 | cbn2 = BatchNormalization(axis=-1, name='CBN_2')(conv2) 40 | conv3 = Conv1D(512, 5, strides=1, activation='relu', name='Conv1D_3')(cbn2) 41 | 42 | # Batch normalize 43 | x = BatchNormalization(axis=-1, name='BN_2')(conv3) 44 | 45 | # BiRNNs 46 | # birnn1 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_1'), merge_mode='sum')(bn2) 47 | # birnn2 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_2'), merge_mode='sum')(birnn1) 48 | # birnn3 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_3'), merge_mode='sum')(birnn2) 49 | # birnn4 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_4'), merge_mode='sum')(birnn3) 50 | # birnn5 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_5'), merge_mode='sum')(birnn4) 51 | # birnn6 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_6'), merge_mode='sum')(birnn5) 52 | # birnn7 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_7'), merge_mode='sum')(birnn6) 53 | 54 | # BiRNNs 55 | for i in [1, 2, 3, 4, 5]: 56 | recurrent = GRU(units=800, 57 | activation='tanh', 58 | recurrent_activation='sigmoid', 59 | use_bias=True, 60 | return_sequences=True, 61 | reset_after=True, 62 | name=f'gru_{i}') 63 | x = Bidirectional(recurrent, 64 | name=f'bidirectional_{i}', 65 | merge_mode='concat')(x) 66 | x = Dropout(rate=0.5)(x) if i < 5 else x # Only between 67 | 68 | # Batch normalize 69 | bn3 = BatchNormalization(axis=-1, name='BN_3')(x) 70 | 71 | dense = TimeDistributed(Dense(1024, activation='relu', name='FC1'))(bn3) 72 | y_pred = TimeDistributed(Dense(output_dim, activation='softmax', name='y_pred'), name='the_output')(dense) 73 | 74 | model = Model(inputs=input_data, outputs=y_pred) 75 | 76 | # # your ground truth data. The data you are going to compare with the model's outputs in training 77 | # labels = Input(name='the_labels', shape=[label_dim], dtype='float32') 78 | # # the length (in steps, or chars this case) of each sample (sentence) in the y_pred tensor 79 | # input_length = Input(name='input_length', shape=[1], dtype='float32') 80 | # # the length (in steps, or chars this case) of each sample (sentence) in the y_true 81 | # label_length = Input(name='label_length', shape=[1], dtype='float32') 82 | # output = Lambda(ctc_loss, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) 83 | # model = Model(inputs=[input_data, labels, input_length, label_length], outputs=output, name="deepspeech2pro_v1") 84 | return model 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepAsr 2 | DeepAsr is an open-source & Keras (Tensorflow) implementation of end-to-end Automatic Speech Recognition (ASR) engine and it supports multiple Speech Recognition architectures. 3 | 4 | Supported Asr Architectures: 5 | - Baidu's Deep Speech 2 6 | - DeepAsrNetwork1 7 | 8 | **Using DeepAsr you can**: 9 | - perform speech-to-text using pre-trained models 10 | - tune pre-trained models to your needs 11 | - create new models on your own 12 | 13 | **DeepAsr key features**: 14 | - **Multi GPU support**: You can do much more like distribute the training using the [Strategy](https://www.tensorflow.org/guide/distributed_training), or experiment with [mixed precision](https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/Policy) policy. 15 | - **CuDNN support**: Model using [CuDNNLSTM](https://keras.io/layers/recurrent/) implementation by NVIDIA Developers. CPU devices is also supported. 16 | - **DataGenerator**: The feature extraction during model training for large the data. 17 | 18 | ## Installation 19 | You can use pip: 20 | ```bash 21 | pip install deepasr 22 | ``` 23 | 24 | ## Getting started 25 | The speech recognition is a tough task. You don't need to know all details to use one of the pretrained models. 26 | However it's worth to understand conceptional crucial components: 27 | - **Input**: Audio files (WAV or FLAC) with mono 16-bit 16 kHz (up to 5 seconds) 28 | - **FeaturesExtractor**: Convert audio files using MFCC Features or Spectrogram 29 | - **Model**: CTC model defined in [**Keras**](https://keras.io/) (references: [[1]](https://arxiv.org/abs/1412.5567), [[2]](https://arxiv.org/abs/1512.02595)) 30 | - **Decoder**: Greedy or BeamSearch algorithms with the language model support decode a sequence of probabilities using Alphabet 31 | - **DataGenerator**: Stream data to the model via generator 32 | - **Callbacks**: Set of functions monitoring the training 33 | 34 | ```python 35 | import numpy as np 36 | import pandas as pd 37 | import tensorflow as tf 38 | import deepasr as asr 39 | 40 | # get CTCPipeline 41 | def get_config(feature_type: str = 'spectrogram', multi_gpu: bool = False): 42 | # audio feature extractor 43 | features_extractor = asr.features.preprocess(feature_type=feature_type, features_num=161, 44 | samplerate=16000, 45 | winlen=0.02, 46 | winstep=0.025, 47 | winfunc=np.hanning) 48 | 49 | # input label encoder 50 | alphabet_en = asr.vocab.Alphabet(lang='en') 51 | # training model 52 | model = asr.model.get_deepspeech2( 53 | input_dim=161, 54 | output_dim=29, 55 | is_mixed_precision=True 56 | ) 57 | # model optimizer 58 | optimizer = tf.keras.optimizers.Adam( 59 | lr=1e-4, 60 | beta_1=0.9, 61 | beta_2=0.999, 62 | epsilon=1e-8 63 | ) 64 | # output label deocder 65 | decoder = asr.decoder.GreedyDecoder() 66 | # decoder = asr.decoder.BeamSearchDecoder(beam_width=100, top_paths=1) 67 | # CTCPipeline 68 | pipeline = asr.pipeline.ctc_pipeline.CTCPipeline( 69 | alphabet=alphabet_en, features_extractor=features_extractor, model=model, optimizer=optimizer, decoder=decoder, 70 | sample_rate=16000, mono=True, multi_gpu=multi_gpu 71 | ) 72 | return pipeline 73 | 74 | 75 | train_data = pd.read_csv('train_data.csv') 76 | 77 | pipeline = get_config(feature_type = 'fbank', multi_gpu=False) 78 | 79 | # train asr model 80 | history = pipeline.fit(train_dataset=train_data, batch_size=128, epochs=500) 81 | # history = pipeline.fit_generator(train_dataset = train_data, batch_size=32, epochs=500) 82 | 83 | pipeline.save('./checkpoint') 84 | ``` 85 | 86 | Loaded pre-trained model has all components. The prediction can be invoked just by calling pipline.predict(). 87 | 88 | ```python 89 | import pandas as pd 90 | import deepasr as asr 91 | import numpy as np 92 | test_data = pd.read_csv('test_data.csv') 93 | 94 | # get testing audio and transcript from dataset 95 | index = np.random.randint(test_data.shape[0]) 96 | data = test_data.iloc[index] 97 | test_file = data[0] 98 | test_transcript = data[1] 99 | # Test Audio file 100 | print("Audio File:",test_file) 101 | # Test Transcript 102 | print("Audio Transcript:", test_transcript) 103 | print("Transcript length:",len(test_transcript)) 104 | 105 | pipeline = asr.pipeline.load('./checkpoint') 106 | print("Prediction", pipeline.predict(test_file)) 107 | ``` 108 | 109 | #### References 110 | 111 | The fundamental repositories: 112 | - Baidu - [DeepSpeech2 - A PaddlePaddle implementation of DeepSpeech2 architecture for ASR](https://github.com/PaddlePaddle/DeepSpeech) 113 | - NVIDIA - [Toolkit for efficient experimentation with Speech Recognition, Text2Speech and NLP](https://nvidia.github.io/OpenSeq2Seq) 114 | - TensorFlow - [The implementation of DeepSpeech2 model](https://github.com/tensorflow/models/tree/master/research/deep_speech) 115 | - Mozilla - [DeepSpeech - A TensorFlow implementation of Baidu's DeepSpeech architecture](https://github.com/mozilla/DeepSpeech) 116 | - Espnet - [End-to-End Speech Processing Toolkit](https://github.com/espnet/espnet) 117 | - Automatic Speech Recognition - [Distill the Automatic Speech Recognition research](https://github.com/rolczynski/Automatic-Speech-Recognition) 118 | - Python Speech Features - [Speech features for ASR including MFCCs and filterbank energies](https://github.com/jameslyons/python_speech_features) -------------------------------------------------------------------------------- /deepasr/features/sigproc.py: -------------------------------------------------------------------------------- 1 | # This file includes routines for basic signal processing including framing and computing power spectra. 2 | # Author: James Lyons 2012 3 | import decimal 4 | 5 | import numpy 6 | import math 7 | import logging 8 | 9 | 10 | def round_half_up(number): 11 | return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP)) 12 | 13 | 14 | def rolling_window(a, window, step=1): 15 | # http://ellisvalentiner.com/post/2017-03-21-np-strides-trick 16 | shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) 17 | strides = a.strides + (a.strides[-1],) 18 | return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step] 19 | 20 | 21 | def framesig(sig, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,)), stride_trick=True): 22 | """Frame a signal into overlapping frames. 23 | 24 | :param sig: the audio signal to frame. 25 | :param frame_len: length of each frame measured in samples. 26 | :param frame_step: number of samples after the start of the previous frame that the next frame should begin. 27 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. 28 | :param stride_trick: use stride trick to compute the rolling window and window multiplication faster 29 | :returns: an array of frames. Size is NUMFRAMES by frame_len. 30 | """ 31 | slen = len(sig) 32 | frame_len = int(round_half_up(frame_len)) 33 | frame_step = int(round_half_up(frame_step)) 34 | if slen <= frame_len: 35 | numframes = 1 36 | else: 37 | numframes = 1 + int(math.ceil((1.0 * slen - frame_len) / frame_step)) 38 | 39 | padlen = int((numframes - 1) * frame_step + frame_len) 40 | 41 | zeros = numpy.zeros((padlen - slen,)) 42 | padsignal = numpy.concatenate((sig, zeros)) 43 | if stride_trick: 44 | win = winfunc(frame_len) 45 | frames = rolling_window(padsignal, window=frame_len, step=frame_step) 46 | else: 47 | indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( 48 | numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T 49 | indices = numpy.array(indices, dtype=numpy.int32) 50 | frames = padsignal[indices] 51 | win = numpy.tile(winfunc(frame_len), (numframes, 1)) 52 | 53 | return frames * win 54 | 55 | 56 | def deframesig(frames, siglen, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,))): 57 | """Does overlap-add procedure to undo the action of framesig. 58 | 59 | :param frames: the array of frames. 60 | :param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples. 61 | :param frame_len: length of each frame measured in samples. 62 | :param frame_step: number of samples after the start of the previous frame that the next frame should begin. 63 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. 64 | :returns: a 1-D signal. 65 | """ 66 | frame_len = round_half_up(frame_len) 67 | frame_step = round_half_up(frame_step) 68 | numframes = numpy.shape(frames)[0] 69 | assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len' 70 | 71 | indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( 72 | numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T 73 | indices = numpy.array(indices, dtype=numpy.int32) 74 | padlen = (numframes - 1) * frame_step + frame_len 75 | 76 | if siglen <= 0: siglen = padlen 77 | 78 | rec_signal = numpy.zeros((padlen,)) 79 | window_correction = numpy.zeros((padlen,)) 80 | win = winfunc(frame_len) 81 | 82 | for i in range(0, numframes): 83 | window_correction[indices[i, :]] = window_correction[ 84 | indices[i, :]] + win + 1e-15 # add a little bit so it is never zero 85 | rec_signal[indices[i, :]] = rec_signal[indices[i, :]] + frames[i, :] 86 | 87 | rec_signal = rec_signal / window_correction 88 | return rec_signal[0:siglen] 89 | 90 | 91 | def magspec(frames, NFFT): 92 | """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). 93 | 94 | :param frames: the array of frames. Each row is a frame. 95 | :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. 96 | :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame. 97 | """ 98 | if numpy.shape(frames)[1] > NFFT: 99 | logging.warn( 100 | 'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', 101 | numpy.shape(frames)[1], NFFT) 102 | complex_spec = numpy.fft.rfft(frames, NFFT) 103 | return numpy.absolute(complex_spec) 104 | 105 | 106 | def powspec(frames, NFFT): 107 | """Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). 108 | 109 | :param frames: the array of frames. Each row is a frame. 110 | :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. 111 | :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame. 112 | """ 113 | return 1.0 / NFFT * numpy.square(magspec(frames, NFFT)) 114 | 115 | 116 | def logpowspec(frames, NFFT, norm=1): 117 | """Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). 118 | 119 | :param frames: the array of frames. Each row is a frame. 120 | :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. 121 | :param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0. 122 | :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame. 123 | """ 124 | ps = powspec(frames, NFFT) 125 | ps[ps <= 1e-30] = 1e-30 126 | lps = 10 * numpy.log10(ps) 127 | if norm: 128 | return lps - numpy.max(lps) 129 | else: 130 | return lps 131 | 132 | 133 | def preemphasis(signal, coeff=0.95): 134 | """perform preemphasis on the input signal. 135 | 136 | :param signal: The signal to filter. 137 | :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95. 138 | :returns: the filtered signal. 139 | """ 140 | return numpy.append(signal[0], signal[1:] - coeff * signal[:-1]) 141 | -------------------------------------------------------------------------------- /deepasr/features/mfcc.py: -------------------------------------------------------------------------------- 1 | # calculate filterbank features. Provides e.g. fbank and mfcc features for use in ASR applications 2 | # Author: James Lyons 2012 3 | from __future__ import division 4 | import numpy 5 | from . import sigproc 6 | from scipy.fftpack import dct 7 | 8 | 9 | def calculate_nfft(samplerate, winlen): 10 | """Calculates the FFT size as a power of two greater than or equal to 11 | the number of samples in a single window length. 12 | 13 | Having an FFT less than the window length loses precision by dropping 14 | many of the samples; a longer FFT than the window allows zero-padding 15 | of the FFT buffer which is neutral in terms of frequency domain conversion. 16 | 17 | :param samplerate: The sample rate of the signal we are working with, in Hz. 18 | :param winlen: The length of the analysis window in seconds. 19 | """ 20 | window_length_samples = winlen * samplerate 21 | nfft = 1 22 | while nfft < window_length_samples: 23 | nfft *= 2 24 | return nfft 25 | 26 | 27 | def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, 28 | nfilt=26, nfft=None, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True, 29 | winfunc=lambda x: numpy.ones((x,))): 30 | """Compute MFCC features from an audio signal. 31 | 32 | :param signal: the audio signal from which to compute features. Should be an N*1 array 33 | :param samplerate: the sample rate of the signal we are working with, in Hz. 34 | :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) 35 | :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) 36 | :param numcep: the number of cepstrum to return, default 13 37 | :param nfilt: the number of filters in the filterbank, default 26. 38 | :param nfft: the FFT size. Default is None, which uses the calculate_nfft function to choose the smallest size that does not drop sample data. 39 | :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. 40 | :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 41 | :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 42 | :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 43 | :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. 44 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming 45 | :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. 46 | """ 47 | nfft = nfft or calculate_nfft(samplerate, winlen) 48 | feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc) 49 | feat = numpy.log(feat) 50 | feat = dct(feat, type=2, axis=1, norm='ortho')[:, :numcep] 51 | feat = lifter(feat, ceplifter) 52 | if appendEnergy: feat[:, 0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy 53 | return feat 54 | 55 | 56 | def fbank(signal, samplerate=16000, winlen=0.025, winstep=0.01, 57 | nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, 58 | winfunc=lambda x: numpy.ones((x,))): 59 | """Compute Mel-filterbank energy features from an audio signal. 60 | 61 | :param signal: the audio signal from which to compute features. Should be an N*1 array 62 | :param samplerate: the sample rate of the signal we are working with, in Hz. 63 | :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) 64 | :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) 65 | :param nfilt: the number of filters in the filterbank, default 26. 66 | :param nfft: the FFT size. Default is 512. 67 | :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. 68 | :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 69 | :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 70 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming 71 | :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The 72 | second return value is the energy in each frame (total energy, unwindowed) 73 | """ 74 | highfreq = highfreq or samplerate / 2 75 | signal = sigproc.preemphasis(signal, preemph) 76 | frames = sigproc.framesig(signal, winlen * samplerate, winstep * samplerate, winfunc) 77 | pspec = sigproc.powspec(frames, nfft) 78 | energy = numpy.sum(pspec, 1) # this stores the total energy in each frame 79 | energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy) # if energy is zero, we get problems with log 80 | 81 | fb = get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq) 82 | feat = numpy.dot(pspec, fb.T) # compute the filterbank energies 83 | feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat) # if feat is zero, we get problems with log 84 | 85 | return feat, energy 86 | 87 | 88 | def logfbank(signal, samplerate=16000, winlen=0.025, winstep=0.01, 89 | nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, 90 | winfunc=lambda x: numpy.ones((x,))): 91 | """Compute log Mel-filterbank energy features from an audio signal. 92 | 93 | :param signal: the audio signal from which to compute features. Should be an N*1 array 94 | :param samplerate: the sample rate of the signal we are working with, in Hz. 95 | :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) 96 | :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) 97 | :param nfilt: the number of filters in the filterbank, default 26. 98 | :param nfft: the FFT size. Default is 512. 99 | :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. 100 | :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 101 | :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 102 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming 103 | :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. 104 | """ 105 | feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc) 106 | return numpy.log(feat) 107 | 108 | 109 | def ssc(signal, samplerate=16000, winlen=0.025, winstep=0.01, 110 | nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, 111 | winfunc=lambda x: numpy.ones((x,))): 112 | """Compute Spectral Subband Centroid features from an audio signal. 113 | 114 | :param signal: the audio signal from which to compute features. Should be an N*1 array 115 | :param samplerate: the sample rate of the signal we are working with, in Hz. 116 | :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) 117 | :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) 118 | :param nfilt: the number of filters in the filterbank, default 26. 119 | :param nfft: the FFT size. Default is 512. 120 | :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. 121 | :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 122 | :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 123 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming 124 | :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. 125 | """ 126 | highfreq = highfreq or samplerate / 2 127 | signal = sigproc.preemphasis(signal, preemph) 128 | frames = sigproc.framesig(signal, winlen * samplerate, winstep * samplerate, winfunc) 129 | pspec = sigproc.powspec(frames, nfft) 130 | pspec = numpy.where(pspec == 0, numpy.finfo(float).eps, pspec) # if things are all zeros we get problems 131 | 132 | fb = get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq) 133 | feat = numpy.dot(pspec, fb.T) # compute the filterbank energies 134 | R = numpy.tile(numpy.linspace(1, samplerate / 2, numpy.size(pspec, 1)), (numpy.size(pspec, 0), 1)) 135 | 136 | return numpy.dot(pspec * R, fb.T) / feat 137 | 138 | 139 | def hz2mel(hz): 140 | """Convert a value in Hertz to Mels 141 | 142 | :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise. 143 | :returns: a value in Mels. If an array was passed in, an identical sized array is returned. 144 | """ 145 | return 2595 * numpy.log10(1 + hz / 700.) 146 | 147 | 148 | def mel2hz(mel): 149 | """Convert a value in Mels to Hertz 150 | 151 | :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise. 152 | :returns: a value in Hertz. If an array was passed in, an identical sized array is returned. 153 | """ 154 | return 700 * (10 ** (mel / 2595.0) - 1) 155 | 156 | 157 | def get_filterbanks(nfilt=20, nfft=512, samplerate=16000, lowfreq=0, highfreq=None): 158 | """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond 159 | to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1) 160 | 161 | :param nfilt: the number of filters in the filterbank, default 20. 162 | :param nfft: the FFT size. Default is 512. 163 | :param samplerate: the sample rate of the signal we are working with, in Hz. Affects mel spacing. 164 | :param lowfreq: lowest band edge of mel filters, default 0 Hz 165 | :param highfreq: highest band edge of mel filters, default samplerate/2 166 | :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter. 167 | """ 168 | highfreq = highfreq or samplerate / 2 169 | assert highfreq <= samplerate / 2, "highfreq is greater than samplerate/2" 170 | 171 | # compute points evenly spaced in mels 172 | lowmel = hz2mel(lowfreq) 173 | highmel = hz2mel(highfreq) 174 | melpoints = numpy.linspace(lowmel, highmel, nfilt + 2) 175 | # our points are in Hz, but we use fft bins, so we have to convert 176 | # from Hz to fft bin number 177 | bin = numpy.floor((nfft + 1) * mel2hz(melpoints) / samplerate) 178 | 179 | fbank = numpy.zeros([nfilt, nfft // 2 + 1]) 180 | for j in range(0, nfilt): 181 | for i in range(int(bin[j]), int(bin[j + 1])): 182 | fbank[j, i] = (i - bin[j]) / (bin[j + 1] - bin[j]) 183 | for i in range(int(bin[j + 1]), int(bin[j + 2])): 184 | fbank[j, i] = (bin[j + 2] - i) / (bin[j + 2] - bin[j + 1]) 185 | return fbank 186 | 187 | 188 | def lifter(cepstra, L=22): 189 | """Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the 190 | magnitude of the high frequency DCT coeffs. 191 | 192 | :param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size. 193 | :param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter. 194 | """ 195 | if L > 0: 196 | nframes, ncoeff = numpy.shape(cepstra) 197 | n = numpy.arange(ncoeff) 198 | lift = 1 + (L / 2.) * numpy.sin(numpy.pi * n / L) 199 | return lift * cepstra 200 | else: 201 | # values of L <= 0, do nothing 202 | return cepstra 203 | 204 | 205 | def delta(feat, N): 206 | """Compute delta features from a feature vector sequence. 207 | 208 | :param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector. 209 | :param N: For each frame, calculate delta features based on preceding and following N frames 210 | :returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector. 211 | """ 212 | if N < 1: 213 | raise ValueError('N must be an integer >= 1') 214 | NUMFRAMES = len(feat) 215 | denominator = 2 * sum([i ** 2 for i in range(1, N + 1)]) 216 | delta_feat = numpy.empty_like(feat) 217 | padded = numpy.pad(feat, ((N, N), (0, 0)), mode='edge') # padded version of feat 218 | for t in range(NUMFRAMES): 219 | delta_feat[t] = numpy.dot(numpy.arange(-N, N + 1), 220 | padded[t: t + 2 * N + 1]) / denominator # [t : t+2*N+1] == [(N+t)-N : (N+t)+N+1] 221 | return delta_feat 222 | -------------------------------------------------------------------------------- /deepasr/pipeline/ctc_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from typing import List 4 | import numpy as np 5 | import random 6 | import tensorflow as tf 7 | from tensorflow import keras 8 | import pandas as pd 9 | from concurrent.futures import ThreadPoolExecutor, wait 10 | # from tensorflow.keras.layers import * 11 | from tensorflow.keras.models import Model 12 | import sys 13 | 14 | sys.path.append("..") 15 | from deepasr.pipeline import Pipeline 16 | from deepasr.augmentation import Augmentation 17 | from deepasr.decoder import Decoder 18 | from deepasr.features import FeaturesExtractor 19 | from deepasr.vocab import Alphabet 20 | from deepasr.utils import read_audio, save_data 21 | from deepasr.model import compile_model 22 | 23 | logger = logging.getLogger('asr.pipeline') 24 | 25 | 26 | class CTCPipeline(Pipeline): 27 | """ 28 | The pipeline is responsible for connecting a neural network model with 29 | all non-differential transformations (features extraction or decoding), 30 | and dependencies. Components are independent. 31 | """ 32 | 33 | def __init__(self, 34 | alphabet: Alphabet, 35 | features_extractor: FeaturesExtractor, 36 | model: keras.Model, 37 | optimizer: keras.optimizers.Optimizer, 38 | decoder: Decoder, 39 | sample_rate: int, 40 | mono: True, 41 | label_len: int = 0, 42 | multi_gpu: bool = True, 43 | temp_model: keras.Model = None): 44 | self._alphabet = alphabet 45 | self._optimizer = optimizer 46 | self._decoder = decoder 47 | self._features_extractor = features_extractor 48 | self.sample_rate = sample_rate 49 | self.mono = mono 50 | self.label_len = label_len 51 | self.multi_gpu = multi_gpu 52 | self._model = self._compile_model(model, optimizer, multi_gpu) 53 | self.temp_model = temp_model if temp_model else self._model 54 | 55 | @property 56 | def alphabet(self) -> Alphabet: 57 | return self._alphabet 58 | 59 | @property 60 | def features_extractor(self) -> FeaturesExtractor: 61 | return self._features_extractor 62 | 63 | @property 64 | def model(self) -> keras.Model: 65 | return self.temp_model 66 | 67 | @property 68 | def decoder(self) -> Decoder: 69 | return self._decoder 70 | 71 | def preprocess(self, 72 | data: List[np.ndarray], 73 | is_extracted: bool, 74 | augmentation: Augmentation) -> np.ndarray: 75 | """ Preprocess batch data to format understandable to a model. """ 76 | 77 | if is_extracted: # then just align features 78 | features = FeaturesExtractor.align(data) 79 | else: 80 | features = self._features_extractor(data) 81 | features = augmentation(features) if augmentation else features 82 | # labels = self._alphabet.get_batch_labels(transcripts) 83 | return features 84 | 85 | def fit_iter(self, 86 | train_dataset: pd.DataFrame, 87 | augmentation: Augmentation = None, 88 | prepared_features: bool = False, 89 | iter_num: int = 1000, 90 | batch_size: int = 32, 91 | epochs: int = 3, 92 | checkpoint: str = None, 93 | **kwargs) -> keras.callbacks.History: 94 | """ Get ready data and train a model. """ 95 | 96 | history = keras.callbacks.History() 97 | 98 | audios = train_dataset['path'].to_list() 99 | 100 | labels = self._alphabet.get_batch_labels(train_dataset['transcripts'].to_list()) 101 | 102 | transcripts = train_dataset['transcripts'].to_list() 103 | 104 | train_len_ = len(transcripts) 105 | 106 | self.label_len = labels.shape[1] 107 | 108 | self._model.summary() 109 | 110 | for i in range(iter_num): 111 | train_index = random.sample(range(train_len_ - 25), batch_size) 112 | 113 | x_train = [audios[i] for i in train_index] 114 | 115 | y_train = [labels[i] for i in train_index] 116 | 117 | y_trans = [transcripts[i] for i in train_index] 118 | 119 | train_inputs = self.wrap_preprocess(x_train, 120 | y_train, 121 | y_trans, augmentation, prepared_features) 122 | 123 | outputs = {'ctc': np.zeros([batch_size])} 124 | 125 | # print(train_inputs['the_input'].shape) 126 | # print(train_inputs['the_labels'].shape) 127 | # print(train_inputs['input_length'].shape) 128 | # print(train_inputs['label_length'].shape) 129 | # print(train_inputs['input_length']) 130 | # print(train_inputs['label_length']) 131 | 132 | if i % 100 == 0: 133 | print("iter:", i) 134 | print("input features: ", train_inputs['the_input'].shape) 135 | print("input labels: ", train_inputs['the_labels'].shape) 136 | history = self._model.fit(train_inputs, outputs, 137 | batch_size=batch_size, 138 | epochs=epochs, 139 | verbose=1, **kwargs) 140 | if checkpoint: 141 | self.save(checkpoint) 142 | print("Pipeline Saved at", checkpoint) 143 | else: 144 | history = self._model.fit(train_inputs, outputs, 145 | batch_size=batch_size, 146 | epochs=epochs, 147 | verbose=0, **kwargs) 148 | 149 | return history 150 | 151 | def fit(self, 152 | train_dataset: pd.DataFrame, 153 | augmentation: Augmentation = None, 154 | prepared_features: bool = False, 155 | batch_size: int = 32, 156 | epochs: int = 3, 157 | checkpoint: str = None, 158 | **kwargs) -> keras.callbacks.History: 159 | """ Get ready data and train a model. """ 160 | 161 | audios = train_dataset['path'].to_list() 162 | 163 | labels = self._alphabet.get_batch_labels(train_dataset['transcripts'].to_list()) 164 | 165 | transcripts = train_dataset['transcripts'].to_list() 166 | 167 | self.label_len = labels.shape[1] 168 | 169 | self._model.summary() 170 | 171 | print("Feature Extraction in progress...") 172 | train_inputs = self.wrap_preprocess(audios, 173 | list(labels), 174 | transcripts, augmentation, prepared_features) 175 | 176 | outputs = {'ctc': np.zeros([len(audios)])} 177 | 178 | print("Feature Extraction completed.") 179 | 180 | print("input features: ", train_inputs['the_input'].shape) 181 | print("input labels: ", train_inputs['the_labels'].shape) 182 | 183 | print("Model training initiated...") 184 | 185 | history = self._model.fit(train_inputs, outputs, 186 | batch_size=batch_size, 187 | epochs=epochs, 188 | verbose=1, **kwargs) 189 | 190 | return history 191 | 192 | def fit_generator(self, train_dataset: pd.DataFrame, 193 | shuffle: bool = True, 194 | augmentation: Augmentation = None, 195 | prepared_features: bool = False, 196 | batch_size: int = 32, 197 | epochs: int = 3, 198 | verbose: int = 1, 199 | **kwargs) -> keras.callbacks.History: 200 | 201 | """ Get ready data and train a model. """ 202 | 203 | audios = train_dataset['path'].to_list() 204 | 205 | labels = self._alphabet.get_batch_labels(train_dataset['transcripts'].to_list()) 206 | 207 | transcripts = train_dataset['transcripts'].to_list() 208 | 209 | train_len_ = len(transcripts) 210 | 211 | self.label_len = labels.shape[1] 212 | 213 | self._model.summary() 214 | 215 | train_gen = self.get_generator(audios, labels, transcripts, 216 | batch_size, shuffle, augmentation, prepared_features) 217 | 218 | return self._model.fit(train_gen, epochs=epochs, 219 | steps_per_epoch=train_len_ // batch_size, verbose=verbose, **kwargs) 220 | 221 | def get_generator(self, audio_paths: List[str], texts: np.array, transcripts: List[str], batch_size: int = 32, 222 | shuffle: bool = True, augmentation: Augmentation = None, 223 | prepared_features: bool = False): 224 | """ Data Generator """ 225 | 226 | def generator(): 227 | num_samples = len(audio_paths) 228 | while True: 229 | x = list() 230 | y = list() 231 | if shuffle: 232 | temp = list(zip(audio_paths, texts)) 233 | random.Random(123).shuffle(temp) 234 | x, y = list(zip(*temp)) 235 | 236 | pool = ThreadPoolExecutor(1) # Run a single I/O thread in parallel 237 | future = pool.submit(self.wrap_preprocess, 238 | x[:batch_size], 239 | y[:batch_size], transcripts[:batch_size], augmentation, prepared_features) 240 | for offset in range(batch_size, num_samples, batch_size): 241 | wait([future]) 242 | batch = future.result() 243 | future = pool.submit(self.wrap_preprocess, 244 | x[offset: offset + batch_size], 245 | y[offset: offset + batch_size], transcripts[offset:offset + batch_size], 246 | augmentation, prepared_features) 247 | yield batch, {'ctc': np.zeros([batch_size])} 248 | 249 | return generator() 250 | 251 | def wrap_preprocess(self, audios: List[str], the_labels: List[np.array], transcripts: List[str], 252 | augmentation: Augmentation = None, 253 | prepared_features: bool = False): 254 | """ Build training data """ 255 | # the_input = np.array(the_input) / 100 256 | # the_input = x3/np.max(the_input) 257 | 258 | mid_features = [read_audio(audio, sample_rate=self.sample_rate, mono=self.mono) for audio in audios] 259 | 260 | the_input = self.preprocess(mid_features, prepared_features, augmentation) 261 | 262 | the_labels = np.array(the_labels) 263 | 264 | label_len = [len(trans) for trans in transcripts] # length of each transcription 265 | label_lengths = np.array(label_len).reshape(-1, 1) # reshape to 1d 266 | 267 | input_lengths = np.ones((the_labels.shape[0], 1)) * the_labels.shape[1] 268 | for i in range(the_input.shape[0]): 269 | input_lengths[i] = the_labels.shape[1] # num of features from labels 270 | 271 | return { 272 | 'the_input': the_input, 273 | 'the_labels': the_labels, 274 | 'input_length': np.asarray(input_lengths), 275 | 'label_length': np.asarray(label_lengths) 276 | } 277 | 278 | def predict(self, audio: str, **kwargs) -> List[str]: 279 | """ Get ready features, and make a prediction. """ 280 | # get audio features 281 | features = self.features_extractor.make_features( 282 | read_audio(audio, sample_rate=self.sample_rate, mono=self.mono)) 283 | in_features = self.features_extractor.align([features], self.features_extractor.features_shape) 284 | 285 | pred_model = Model(inputs=self._model.get_layer('the_input').output, 286 | outputs=self._model.get_layer('the_output').output) 287 | batch_logits = pred_model.predict(in_features, **kwargs) 288 | decoded_labels = self._decoder(batch_logits, self.label_len) 289 | predictions = self._alphabet.get_batch_transcripts(decoded_labels) 290 | return predictions 291 | 292 | def save(self, directory: str): 293 | """ Save each component of the CTC pipeline. """ 294 | self.temp_model.save(os.path.join(directory, 'network.h5')) 295 | self._model.save_weights(os.path.join(directory, 'model_weights.h5')) 296 | save_data(self._optimizer, os.path.join(directory, 'optimizer.bin')) 297 | save_data(self._alphabet, os.path.join(directory, 'alphabet.bin')) 298 | save_data(self._decoder, os.path.join(directory, 'decoder.bin')) 299 | save_data(self.multi_gpu, os.path.join(directory, 'multi_gpu_flag.bin')) 300 | save_data(self.sample_rate, os.path.join(directory, 'sample_rate.bin')) 301 | save_data(self.mono, os.path.join(directory, 'mono.bin')) 302 | save_data(self.label_len, os.path.join(directory, 'label_len.bin')) 303 | save_data(self._features_extractor, 304 | os.path.join(directory, 'feature_extractor.bin')) 305 | 306 | # def load(self, directory: str): 307 | # """ Load each component of the CTC pipeline. """ 308 | # # model = keras.models.load_model(os.path.join(directory, 'model.h5'), 309 | # # custom_objects={'clipped_relu': cls.clipped_relu}) 310 | # self._model.load_weights(os.path.join(directory, 'model_weights.h5')) 311 | # self._alphabet = load_data(os.path.join(directory, 'alphabet.bin')) 312 | # self._decoder = load_data(os.path.join(directory, 'decoder.bin')) 313 | # self._features_extractor = load_data( 314 | # os.path.join(directory, 'feature_extractor.bin')) 315 | 316 | @staticmethod 317 | def _compile_model(model: keras.Model, 318 | optimizer: keras.optimizers.Optimizer, 319 | multi_gpu: bool) -> keras.Model: 320 | """ Replicates a model on different GPUs. """ 321 | if not multi_gpu: 322 | dist_model = compile_model(model, optimizer) 323 | logger.info("Training using single GPU or CPU") 324 | else: 325 | try: 326 | strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() 327 | with strategy.scope(): 328 | dist_model = compile_model(model, optimizer) 329 | logger.info("Training using multiple GPUs") 330 | except ValueError: 331 | dist_model = compile_model(model, optimizer) 332 | logger.info("Training using single GPU or CPU") 333 | return dist_model 334 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | DeepAsr is an open-source implementation of 633 | end-to-end Automatic Speech Recognition (ASR) engine. 634 | Copyright (C) 2020 Sai Kumar Yava 635 | 636 | This program is free software: you can redistribute it and/or modify 637 | it under the terms of the GNU Affero General Public License as published 638 | by the Free Software Foundation, either version 3 of the License, or 639 | (at your option) any later version. 640 | 641 | This program is distributed in the hope that it will be useful, 642 | but WITHOUT ANY WARRANTY; without even the implied warranty of 643 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 644 | GNU Affero General Public License for more details. 645 | 646 | You should have received a copy of the GNU Affero General Public License 647 | along with this program. If not, see . 648 | 649 | Also add information on how to contact you by electronic and paper mail. 650 | 651 | If your software can interact with users remotely through a computer 652 | network, you should also make sure that it provides a way for users to 653 | get its source. For example, if your program is a web application, its 654 | interface could display a "Source" link that leads users to an archive 655 | of the code. There are many ways you could offer source, and different 656 | solutions will be better for different programs; see section 13 for the 657 | specific requirements. 658 | 659 | You should also get your employer (if you work as a programmer) or school, 660 | if any, to sign a "copyright disclaimer" for the program, if necessary. 661 | For more information on this, and how to apply and follow the GNU AGPL, see 662 | . 663 | -------------------------------------------------------------------------------- /DeepAsr_CTC_Pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# DeepAsr (DeepAsrNetwork1)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "colab": {}, 15 | "colab_type": "code", 16 | "id": "S0FiiN9Y0FEs" 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# !wget http://www.openslr.org/resources/12/train-clean-100.tar.gz" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "colab": {}, 28 | "colab_type": "code", 29 | "id": "5JJMHx460FE4" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "# !tar xzvf train-clean-100.tar.gz" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "colab": {}, 41 | "colab_type": "code", 42 | "id": "SHLb6nDsUwkN" 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "# ! pip install tensorflow==2.1.0" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "colab_type": "text", 53 | "id": "Vx9UdVs5384B" 54 | }, 55 | "source": [ 56 | "# 1. Prepare DataSet" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "colab": {}, 64 | "colab_type": "code", 65 | "id": "kw_18d180FFM" 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "import os\n", 70 | "import numpy as np\n", 71 | "import pandas as pd\n", 72 | "import tensorflow as tf\n", 73 | "import deepasr as asr\n", 74 | "import librosa" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": { 81 | "colab": { 82 | "base_uri": "https://localhost:8080/", 83 | "height": 34 84 | }, 85 | "colab_type": "code", 86 | "id": "d29QeHTJVNOF", 87 | "outputId": "b79a2fa5-e783-4543-d5b1-125fb3a1bd92" 88 | }, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "'2.1.0'" 94 | ] 95 | }, 96 | "execution_count": 5, 97 | "metadata": { 98 | "tags": [] 99 | }, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "tf.__version__" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "colab": { 112 | "base_uri": "https://localhost:8080/", 113 | "height": 34 114 | }, 115 | "colab_type": "code", 116 | "id": "zQEGr0HfC5OF", 117 | "outputId": "88d851cc-ec17-42d8-c8a2-26cfda9506cb" 118 | }, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "'0.0.9'" 124 | ] 125 | }, 126 | "execution_count": 6, 127 | "metadata": { 128 | "tags": [] 129 | }, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "asr.__version__" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "colab": {}, 142 | "colab_type": "code", 143 | "id": "NmOf6DzG0FFS" 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "# get audios and transcripts\n", 148 | "org_path = './LibriSpeech/train-clean-100/'\n", 149 | "count = 0\n", 150 | "inp = []\n", 151 | "k=0\n", 152 | "audio_name = []\n", 153 | "audio_trans = []\n", 154 | "for dir1 in os.listdir(org_path):\n", 155 | " dir2_path = org_path+dir1+'/'\n", 156 | " #print(dir2_path)\n", 157 | " for dir2 in os.listdir(dir2_path):\n", 158 | " dir3_path = dir2_path+dir2+'/'\n", 159 | " \n", 160 | " for audio in os.listdir(dir3_path):\n", 161 | " if audio.endswith('.txt'):\n", 162 | " k+=1\n", 163 | " file_path = dir3_path + audio\n", 164 | " with open(file_path) as f:\n", 165 | " line = f.readlines()\n", 166 | " for lines in line:\n", 167 | " flac_path = dir3_path+lines.split()[0]+'.flac'\n", 168 | " \n", 169 | " audio_name.append(flac_path)\n", 170 | "\n", 171 | " # print(cmd)\n", 172 | " words2 = lines.split()[1:]\n", 173 | " words4=' '.join(words2)\n", 174 | " audio_trans.append(words4)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "colab": {}, 182 | "colab_type": "code", 183 | "id": "5E9POoGc0FFb" 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "# create dataset\n", 188 | "df = pd.DataFrame({\"path\":audio_name,\"transcripts\":audio_trans})" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 9, 194 | "metadata": { 195 | "colab": { 196 | "base_uri": "https://localhost:8080/", 197 | "height": 34 198 | }, 199 | "colab_type": "code", 200 | "id": "TRuPCDxZrhJu", 201 | "outputId": "64b3dc3f-ec85-4fc4-cf4e-60204b6f719e" 202 | }, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "text/plain": [ 207 | "(28539, 2)" 208 | ] 209 | }, 210 | "execution_count": 9, 211 | "metadata": { 212 | "tags": [] 213 | }, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "df.shape" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "colab": {}, 226 | "colab_type": "code", 227 | "id": "g4bePqQvri5Q" 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "# filter transcript less than 100 charcters\n", 232 | "train_data = df[df['transcripts'].str.len() < 100]\n", 233 | "# train_df = df.sample(n = 3000) " 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 11, 239 | "metadata": { 240 | "colab": { 241 | "base_uri": "https://localhost:8080/", 242 | "height": 34 243 | }, 244 | "colab_type": "code", 245 | "id": "fiM94FU3rkh7", 246 | "outputId": "9c60db2a-8b85-47e9-a294-5e46d7c2c41e" 247 | }, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "(3194, 2)" 253 | ] 254 | }, 255 | "execution_count": 11, 256 | "metadata": { 257 | "tags": [] 258 | }, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "train_data.shape" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": { 269 | "colab_type": "text", 270 | "id": "EMDC5MYk4AyL" 271 | }, 272 | "source": [ 273 | "# 2. Prepare DeepAsr CTC Pipeline" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": { 280 | "colab": {}, 281 | "colab_type": "code", 282 | "id": "-youl7Mo0FFi" 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "# get CTCPipeline\n", 287 | "def get_config(feature_type: str = 'spectrogram', multi_gpu: bool = False):\n", 288 | " # audio feature extractor\n", 289 | " features_extractor = asr.features.preprocess(feature_type=feature_type, features_num=161,\n", 290 | " samplerate=16000,\n", 291 | " winlen=0.02,\n", 292 | " winstep=0.025,\n", 293 | " winfunc=np.hanning)\n", 294 | " \n", 295 | " # input label encoder\n", 296 | " alphabet_en = asr.vocab.Alphabet(lang='en')\n", 297 | " # training model\n", 298 | " model = asr.model.get_deepasrnetwork1(\n", 299 | " input_dim=161,\n", 300 | " output_dim=29,\n", 301 | " is_mixed_precision=True\n", 302 | " )\n", 303 | " # model optimizer\n", 304 | " optimizer = tf.keras.optimizers.Adam(\n", 305 | " lr=1e-4,\n", 306 | " beta_1=0.9,\n", 307 | " beta_2=0.999,\n", 308 | " epsilon=1e-8\n", 309 | " )\n", 310 | " # output label deocder\n", 311 | " decoder = asr.decoder.GreedyDecoder()\n", 312 | " # CTCPipeline\n", 313 | " pipeline = asr.pipeline.ctc_pipeline.CTCPipeline(\n", 314 | " alphabet=alphabet_en, features_extractor=features_extractor, model=model, optimizer=optimizer, decoder=decoder,\n", 315 | " sample_rate=16000, mono=True, multi_gpu=multi_gpu\n", 316 | " )\n", 317 | " return pipeline" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "colab": {}, 325 | "colab_type": "code", 326 | "id": "MqdfySzuRtk5" 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "# CTCPiline for asr\n", 331 | "pipeline = get_config(feature_type = 'fbank', multi_gpu=False)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": { 337 | "colab_type": "text", 338 | "id": "WTG8iEwS4NKU" 339 | }, 340 | "source": [ 341 | "# 3. Model traning" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 21, 347 | "metadata": { 348 | "colab": { 349 | "base_uri": "https://localhost:8080/", 350 | "height": 1000 351 | }, 352 | "colab_type": "code", 353 | "id": "6QMxCI8T0qMK", 354 | "outputId": "a598acfb-1cce-41db-db3b-b2d5cdc062d9" 355 | }, 356 | "outputs": [ 357 | { 358 | "name": "stdout", 359 | "output_type": "stream", 360 | "text": [ 361 | "Model: \"DeepAsr\"\n", 362 | "__________________________________________________________________________________________________\n", 363 | "Layer (type) Output Shape Param # Connected to \n", 364 | "==================================================================================================\n", 365 | "the_input (InputLayer) [(None, None, 161)] 0 \n", 366 | "__________________________________________________________________________________________________\n", 367 | "BN_1 (BatchNormalization) (None, None, 161) 644 the_input[0][0] \n", 368 | "__________________________________________________________________________________________________\n", 369 | "Conv1D_1 (Conv1D) (None, None, 220) 177320 BN_1[0][0] \n", 370 | "__________________________________________________________________________________________________\n", 371 | "CNBN_1 (BatchNormalization) (None, None, 220) 880 Conv1D_1[0][0] \n", 372 | "__________________________________________________________________________________________________\n", 373 | "Conv1D_2 (Conv1D) (None, None, 220) 242220 CNBN_1[0][0] \n", 374 | "__________________________________________________________________________________________________\n", 375 | "CNBN_2 (BatchNormalization) (None, None, 220) 880 Conv1D_2[0][0] \n", 376 | "__________________________________________________________________________________________________\n", 377 | "gru_1 (GRU) (None, None, 512) 1127424 CNBN_2[0][0] \n", 378 | "__________________________________________________________________________________________________\n", 379 | "gru_2 (GRU) (None, None, 512) 1127424 CNBN_2[0][0] \n", 380 | "__________________________________________________________________________________________________\n", 381 | "concatenate (Concatenate) (None, None, 1024) 0 gru_1[0][0] \n", 382 | " gru_2[0][0] \n", 383 | "__________________________________________________________________________________________________\n", 384 | "BN_2 (BatchNormalization) (None, None, 1024) 4096 concatenate[0][0] \n", 385 | "__________________________________________________________________________________________________\n", 386 | "time_distributed (TimeDistribut (None, None, 30) 30750 BN_2[0][0] \n", 387 | "__________________________________________________________________________________________________\n", 388 | "the_output (TimeDistributed) (None, None, 29) 899 time_distributed[0][0] \n", 389 | "__________________________________________________________________________________________________\n", 390 | "the_labels (InputLayer) [(None, None)] 0 \n", 391 | "__________________________________________________________________________________________________\n", 392 | "input_length (InputLayer) [(None, 1)] 0 \n", 393 | "__________________________________________________________________________________________________\n", 394 | "label_length (InputLayer) [(None, 1)] 0 \n", 395 | "__________________________________________________________________________________________________\n", 396 | "ctc (Lambda) (None, 1) 0 the_output[0][0] \n", 397 | " the_labels[0][0] \n", 398 | " input_length[0][0] \n", 399 | " label_length[0][0] \n", 400 | "==================================================================================================\n", 401 | "Total params: 2,712,537\n", 402 | "Trainable params: 2,709,287\n", 403 | "Non-trainable params: 3,250\n", 404 | "__________________________________________________________________________________________________\n", 405 | "Feature Extraction in progress...\n", 406 | "Feature Extraction completed.\n", 407 | "input features: (3194, 593, 161)\n", 408 | "input labels: (3194, 99)\n", 409 | "Model training initiated...\n", 410 | "Train on 3194 samples\n", 411 | "Epoch 1/500\n", 412 | "3194/3194 [==============================] - 48s 15ms/sample - loss: inf - accuracy: 0.0000e+00\n", 413 | "Epoch 2/500\n", 414 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 415 | "Epoch 3/500\n", 416 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 417 | "Epoch 4/500\n", 418 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 419 | "Epoch 5/500\n", 420 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 421 | "Epoch 6/500\n", 422 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 423 | "Epoch 7/500\n", 424 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 425 | "Epoch 8/500\n", 426 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 427 | "Epoch 9/500\n", 428 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 429 | "Epoch 10/500\n", 430 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 431 | "Epoch 11/500\n", 432 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 433 | "Epoch 12/500\n", 434 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 435 | "Epoch 13/500\n", 436 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 437 | "Epoch 14/500\n", 438 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 439 | "Epoch 15/500\n", 440 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 441 | "Epoch 16/500\n", 442 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 443 | "Epoch 17/500\n", 444 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 445 | "Epoch 18/500\n", 446 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 447 | "Epoch 19/500\n", 448 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 449 | "Epoch 20/500\n", 450 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 451 | "Epoch 21/500\n", 452 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 453 | "Epoch 22/500\n", 454 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 455 | "Epoch 23/500\n", 456 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 457 | "Epoch 24/500\n", 458 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 459 | "Epoch 25/500\n", 460 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 461 | "Epoch 26/500\n", 462 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 463 | "Epoch 27/500\n", 464 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 465 | "Epoch 28/500\n", 466 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 467 | "Epoch 29/500\n", 468 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 469 | "Epoch 30/500\n", 470 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 471 | "Epoch 31/500\n", 472 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 473 | "Epoch 32/500\n", 474 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 475 | "Epoch 33/500\n", 476 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 477 | "Epoch 34/500\n", 478 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 479 | "Epoch 35/500\n", 480 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 481 | "Epoch 36/500\n", 482 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 483 | "Epoch 37/500\n", 484 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 485 | "Epoch 38/500\n", 486 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 487 | "Epoch 39/500\n", 488 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 489 | "Epoch 40/500\n", 490 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 491 | "Epoch 41/500\n", 492 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 493 | "Epoch 42/500\n", 494 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 495 | "Epoch 43/500\n", 496 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 497 | "Epoch 44/500\n", 498 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 499 | "Epoch 45/500\n", 500 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 501 | "Epoch 46/500\n", 502 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 503 | "Epoch 47/500\n", 504 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 505 | "Epoch 48/500\n", 506 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 507 | "Epoch 49/500\n", 508 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 509 | "Epoch 50/500\n", 510 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 511 | "Epoch 51/500\n", 512 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 513 | "Epoch 52/500\n", 514 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 515 | "Epoch 53/500\n", 516 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 517 | "Epoch 54/500\n", 518 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 519 | "Epoch 55/500\n", 520 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 521 | "Epoch 56/500\n", 522 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 523 | "Epoch 57/500\n", 524 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 525 | "Epoch 58/500\n", 526 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 527 | "Epoch 59/500\n", 528 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 529 | "Epoch 60/500\n", 530 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 531 | "Epoch 61/500\n", 532 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 533 | "Epoch 62/500\n", 534 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 535 | "Epoch 63/500\n", 536 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 537 | "Epoch 64/500\n", 538 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 539 | "Epoch 65/500\n", 540 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 541 | "Epoch 66/500\n", 542 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 543 | "Epoch 67/500\n", 544 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 545 | "Epoch 68/500\n", 546 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 547 | "Epoch 69/500\n", 548 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 549 | "Epoch 70/500\n", 550 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 551 | "Epoch 71/500\n", 552 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 553 | "Epoch 72/500\n", 554 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 555 | "Epoch 73/500\n", 556 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 557 | "Epoch 74/500\n", 558 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 559 | "Epoch 75/500\n", 560 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 561 | "Epoch 76/500\n", 562 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 563 | "Epoch 77/500\n", 564 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 565 | "Epoch 78/500\n", 566 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 567 | "Epoch 79/500\n", 568 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 569 | "Epoch 80/500\n", 570 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 571 | "Epoch 81/500\n", 572 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 573 | "Epoch 82/500\n", 574 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 575 | "Epoch 83/500\n", 576 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 577 | "Epoch 84/500\n", 578 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 579 | "Epoch 85/500\n", 580 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 581 | "Epoch 86/500\n", 582 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 583 | "Epoch 87/500\n", 584 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 585 | "Epoch 88/500\n", 586 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 587 | "Epoch 89/500\n", 588 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 589 | "Epoch 90/500\n", 590 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 591 | "Epoch 91/500\n", 592 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 593 | "Epoch 92/500\n", 594 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 595 | "Epoch 93/500\n", 596 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 597 | "Epoch 94/500\n", 598 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 599 | "Epoch 95/500\n", 600 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 601 | "Epoch 96/500\n", 602 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 603 | "Epoch 97/500\n", 604 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 605 | "Epoch 98/500\n", 606 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 607 | "Epoch 99/500\n", 608 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 609 | "Epoch 100/500\n", 610 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 611 | "Epoch 101/500\n", 612 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 613 | "Epoch 102/500\n", 614 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 615 | "Epoch 103/500\n", 616 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 617 | "Epoch 104/500\n", 618 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 619 | "Epoch 105/500\n", 620 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 621 | "Epoch 106/500\n", 622 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 623 | "Epoch 107/500\n", 624 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 625 | "Epoch 108/500\n", 626 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 627 | "Epoch 109/500\n", 628 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 629 | "Epoch 110/500\n", 630 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 631 | "Epoch 111/500\n", 632 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 633 | "Epoch 112/500\n", 634 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 635 | "Epoch 113/500\n", 636 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 637 | "Epoch 114/500\n", 638 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 639 | "Epoch 115/500\n", 640 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 641 | "Epoch 116/500\n", 642 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 643 | "Epoch 117/500\n", 644 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 645 | "Epoch 118/500\n", 646 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 647 | "Epoch 119/500\n", 648 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 649 | "Epoch 120/500\n", 650 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 651 | "Epoch 121/500\n", 652 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 653 | "Epoch 122/500\n", 654 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 655 | "Epoch 123/500\n", 656 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 657 | "Epoch 124/500\n", 658 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 659 | "Epoch 125/500\n", 660 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 661 | "Epoch 126/500\n", 662 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 663 | "Epoch 127/500\n", 664 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 665 | "Epoch 128/500\n", 666 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 667 | "Epoch 129/500\n", 668 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 669 | "Epoch 130/500\n", 670 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 671 | "Epoch 131/500\n", 672 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 673 | "Epoch 132/500\n", 674 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 675 | "Epoch 133/500\n", 676 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 677 | "Epoch 134/500\n", 678 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 679 | "Epoch 135/500\n", 680 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 681 | "Epoch 136/500\n", 682 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 683 | "Epoch 137/500\n", 684 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 685 | "Epoch 138/500\n", 686 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 687 | "Epoch 139/500\n", 688 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 689 | "Epoch 140/500\n", 690 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 691 | "Epoch 141/500\n", 692 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 693 | "Epoch 142/500\n", 694 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 695 | "Epoch 143/500\n", 696 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 697 | "Epoch 144/500\n", 698 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 699 | "Epoch 145/500\n", 700 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 701 | "Epoch 146/500\n", 702 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 703 | "Epoch 147/500\n", 704 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 705 | "Epoch 148/500\n", 706 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 707 | "Epoch 149/500\n", 708 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 709 | "Epoch 150/500\n", 710 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 711 | "Epoch 151/500\n", 712 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 713 | "Epoch 152/500\n", 714 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 715 | "Epoch 153/500\n", 716 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 717 | "Epoch 154/500\n", 718 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 719 | "Epoch 155/500\n", 720 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 721 | "Epoch 156/500\n", 722 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 723 | "Epoch 157/500\n", 724 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 725 | "Epoch 158/500\n", 726 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 727 | "Epoch 159/500\n", 728 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 729 | "Epoch 160/500\n", 730 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 731 | "Epoch 161/500\n", 732 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 733 | "Epoch 162/500\n", 734 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 735 | "Epoch 163/500\n", 736 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 737 | "Epoch 164/500\n", 738 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 739 | "Epoch 165/500\n", 740 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 741 | "Epoch 166/500\n", 742 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 743 | "Epoch 167/500\n", 744 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 745 | "Epoch 168/500\n", 746 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 747 | "Epoch 169/500\n", 748 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 749 | "Epoch 170/500\n", 750 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 751 | "Epoch 171/500\n", 752 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 753 | "Epoch 172/500\n", 754 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 755 | "Epoch 173/500\n", 756 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 757 | "Epoch 174/500\n", 758 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 759 | "Epoch 175/500\n", 760 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 761 | "Epoch 176/500\n", 762 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 763 | "Epoch 177/500\n", 764 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 765 | "Epoch 178/500\n", 766 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 767 | "Epoch 179/500\n", 768 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 769 | "Epoch 180/500\n", 770 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 771 | "Epoch 181/500\n", 772 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 773 | "Epoch 182/500\n", 774 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 775 | "Epoch 183/500\n", 776 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 777 | "Epoch 184/500\n", 778 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 779 | "Epoch 185/500\n", 780 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 781 | "Epoch 186/500\n", 782 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 783 | "Epoch 187/500\n", 784 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 785 | "Epoch 188/500\n", 786 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 787 | "Epoch 189/500\n", 788 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 789 | "Epoch 190/500\n", 790 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 791 | "Epoch 191/500\n", 792 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 793 | "Epoch 192/500\n", 794 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 795 | "Epoch 193/500\n", 796 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 797 | "Epoch 194/500\n", 798 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 799 | "Epoch 195/500\n", 800 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 801 | "Epoch 196/500\n", 802 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 803 | "Epoch 197/500\n", 804 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 805 | "Epoch 198/500\n", 806 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 807 | "Epoch 199/500\n", 808 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 809 | "Epoch 200/500\n", 810 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 811 | "Epoch 201/500\n", 812 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 813 | "Epoch 202/500\n", 814 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 815 | "Epoch 203/500\n", 816 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 817 | "Epoch 204/500\n", 818 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 819 | "Epoch 205/500\n", 820 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 821 | "Epoch 206/500\n", 822 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 823 | "Epoch 207/500\n", 824 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 825 | "Epoch 208/500\n", 826 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 827 | "Epoch 209/500\n", 828 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 829 | "Epoch 210/500\n", 830 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 831 | "Epoch 211/500\n", 832 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 833 | "Epoch 212/500\n", 834 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 835 | "Epoch 213/500\n", 836 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 837 | "Epoch 214/500\n", 838 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 839 | "Epoch 215/500\n", 840 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 841 | "Epoch 216/500\n", 842 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 843 | "Epoch 217/500\n", 844 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 845 | "Epoch 218/500\n", 846 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 847 | "Epoch 219/500\n", 848 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 849 | "Epoch 220/500\n", 850 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 851 | "Epoch 221/500\n", 852 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 853 | "Epoch 222/500\n", 854 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 855 | "Epoch 223/500\n", 856 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 857 | "Epoch 224/500\n", 858 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 859 | "Epoch 225/500\n", 860 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 861 | "Epoch 226/500\n", 862 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 863 | "Epoch 227/500\n", 864 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 865 | "Epoch 228/500\n", 866 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 867 | "Epoch 229/500\n", 868 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 869 | "Epoch 230/500\n", 870 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 871 | "Epoch 231/500\n", 872 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 873 | "Epoch 232/500\n", 874 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 875 | "Epoch 233/500\n", 876 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 877 | "Epoch 234/500\n", 878 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 879 | "Epoch 235/500\n", 880 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 881 | "Epoch 236/500\n", 882 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 883 | "Epoch 237/500\n", 884 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 885 | "Epoch 238/500\n", 886 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 887 | "Epoch 239/500\n", 888 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 889 | "Epoch 240/500\n", 890 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 891 | "Epoch 241/500\n", 892 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 893 | "Epoch 242/500\n", 894 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 895 | "Epoch 243/500\n", 896 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 897 | "Epoch 244/500\n", 898 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 899 | "Epoch 245/500\n", 900 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 901 | "Epoch 246/500\n", 902 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 903 | "Epoch 247/500\n", 904 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 905 | "Epoch 248/500\n", 906 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 907 | "Epoch 249/500\n", 908 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 909 | "Epoch 250/500\n", 910 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 911 | "Epoch 251/500\n", 912 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 913 | "Epoch 252/500\n", 914 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 915 | "Epoch 253/500\n", 916 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 917 | "Epoch 254/500\n", 918 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 919 | "Epoch 255/500\n", 920 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 921 | "Epoch 256/500\n", 922 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 923 | "Epoch 257/500\n", 924 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 925 | "Epoch 258/500\n", 926 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 927 | "Epoch 259/500\n", 928 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 929 | "Epoch 260/500\n", 930 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 931 | "Epoch 261/500\n", 932 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 933 | "Epoch 262/500\n", 934 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 935 | "Epoch 263/500\n", 936 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 937 | "Epoch 264/500\n", 938 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 939 | "Epoch 265/500\n", 940 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 941 | "Epoch 266/500\n", 942 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 943 | "Epoch 267/500\n", 944 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 945 | "Epoch 268/500\n", 946 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 947 | "Epoch 269/500\n", 948 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 949 | "Epoch 270/500\n", 950 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 951 | "Epoch 271/500\n", 952 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 953 | "Epoch 272/500\n", 954 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 955 | "Epoch 273/500\n", 956 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 957 | "Epoch 274/500\n", 958 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 959 | "Epoch 275/500\n", 960 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 961 | "Epoch 276/500\n", 962 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 963 | "Epoch 277/500\n", 964 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 965 | "Epoch 278/500\n", 966 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 967 | "Epoch 279/500\n", 968 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 969 | "Epoch 280/500\n", 970 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 971 | "Epoch 281/500\n", 972 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 973 | "Epoch 282/500\n", 974 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 975 | "Epoch 283/500\n", 976 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 977 | "Epoch 284/500\n", 978 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 979 | "Epoch 285/500\n", 980 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 981 | "Epoch 286/500\n", 982 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 983 | "Epoch 287/500\n", 984 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 985 | "Epoch 288/500\n", 986 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 987 | "Epoch 289/500\n", 988 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 989 | "Epoch 290/500\n", 990 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 991 | "Epoch 291/500\n", 992 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 993 | "Epoch 292/500\n", 994 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 995 | "Epoch 293/500\n", 996 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 997 | "Epoch 294/500\n", 998 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 999 | "Epoch 295/500\n", 1000 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1001 | "Epoch 296/500\n", 1002 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1003 | "Epoch 297/500\n", 1004 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1005 | "Epoch 298/500\n", 1006 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1007 | "Epoch 299/500\n", 1008 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1009 | "Epoch 300/500\n", 1010 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1011 | "Epoch 301/500\n", 1012 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1013 | "Epoch 302/500\n", 1014 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1015 | "Epoch 303/500\n", 1016 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1017 | "Epoch 304/500\n", 1018 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1019 | "Epoch 305/500\n", 1020 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1021 | "Epoch 306/500\n", 1022 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1023 | "Epoch 307/500\n", 1024 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1025 | "Epoch 308/500\n", 1026 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1027 | "Epoch 309/500\n", 1028 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1029 | "Epoch 310/500\n", 1030 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1031 | "Epoch 311/500\n", 1032 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1033 | "Epoch 312/500\n", 1034 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1035 | "Epoch 313/500\n", 1036 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1037 | "Epoch 314/500\n", 1038 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1039 | "Epoch 315/500\n", 1040 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1041 | "Epoch 316/500\n", 1042 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1043 | "Epoch 317/500\n", 1044 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1045 | "Epoch 318/500\n", 1046 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1047 | "Epoch 319/500\n", 1048 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1049 | "Epoch 320/500\n", 1050 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1051 | "Epoch 321/500\n", 1052 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1053 | "Epoch 322/500\n", 1054 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1055 | "Epoch 323/500\n", 1056 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1057 | "Epoch 324/500\n", 1058 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1059 | "Epoch 325/500\n", 1060 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1061 | "Epoch 326/500\n", 1062 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1063 | "Epoch 327/500\n", 1064 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1065 | "Epoch 328/500\n", 1066 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1067 | "Epoch 329/500\n", 1068 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1069 | "Epoch 330/500\n", 1070 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1071 | "Epoch 331/500\n", 1072 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1073 | "Epoch 332/500\n", 1074 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1075 | "Epoch 333/500\n", 1076 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1077 | "Epoch 334/500\n", 1078 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1079 | "Epoch 335/500\n", 1080 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1081 | "Epoch 336/500\n", 1082 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1083 | "Epoch 337/500\n", 1084 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1085 | "Epoch 338/500\n", 1086 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1087 | "Epoch 339/500\n", 1088 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1089 | "Epoch 340/500\n", 1090 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1091 | "Epoch 341/500\n", 1092 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1093 | "Epoch 342/500\n", 1094 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1095 | "Epoch 343/500\n", 1096 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1097 | "Epoch 344/500\n", 1098 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1099 | "Epoch 345/500\n", 1100 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1101 | "Epoch 346/500\n", 1102 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1103 | "Epoch 347/500\n", 1104 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1105 | "Epoch 348/500\n", 1106 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1107 | "Epoch 349/500\n", 1108 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1109 | "Epoch 350/500\n", 1110 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1111 | "Epoch 351/500\n", 1112 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1113 | "Epoch 352/500\n", 1114 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1115 | "Epoch 353/500\n", 1116 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1117 | "Epoch 354/500\n", 1118 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1119 | "Epoch 355/500\n", 1120 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1121 | "Epoch 356/500\n", 1122 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1123 | "Epoch 357/500\n", 1124 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1125 | "Epoch 358/500\n", 1126 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1127 | "Epoch 359/500\n", 1128 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1129 | "Epoch 360/500\n", 1130 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1131 | "Epoch 361/500\n", 1132 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1133 | "Epoch 362/500\n", 1134 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1135 | "Epoch 363/500\n", 1136 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1137 | "Epoch 364/500\n", 1138 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1139 | "Epoch 365/500\n", 1140 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1141 | "Epoch 366/500\n", 1142 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1143 | "Epoch 367/500\n", 1144 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1145 | "Epoch 368/500\n", 1146 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1147 | "Epoch 369/500\n", 1148 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1149 | "Epoch 370/500\n", 1150 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1151 | "Epoch 371/500\n", 1152 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1153 | "Epoch 372/500\n", 1154 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1155 | "Epoch 373/500\n", 1156 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1157 | "Epoch 374/500\n", 1158 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1159 | "Epoch 375/500\n", 1160 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1161 | "Epoch 376/500\n", 1162 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1163 | "Epoch 377/500\n", 1164 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1165 | "Epoch 378/500\n", 1166 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1167 | "Epoch 379/500\n", 1168 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1169 | "Epoch 380/500\n", 1170 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1171 | "Epoch 381/500\n", 1172 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1173 | "Epoch 382/500\n", 1174 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1175 | "Epoch 383/500\n", 1176 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1177 | "Epoch 384/500\n", 1178 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1179 | "Epoch 385/500\n", 1180 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1181 | "Epoch 386/500\n", 1182 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1183 | "Epoch 387/500\n", 1184 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1185 | "Epoch 388/500\n", 1186 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1187 | "Epoch 389/500\n", 1188 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1189 | "Epoch 390/500\n", 1190 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1191 | "Epoch 391/500\n", 1192 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1193 | "Epoch 392/500\n", 1194 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1195 | "Epoch 393/500\n", 1196 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1197 | "Epoch 394/500\n", 1198 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1199 | "Epoch 395/500\n", 1200 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1201 | "Epoch 396/500\n", 1202 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1203 | "Epoch 397/500\n", 1204 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1205 | "Epoch 398/500\n", 1206 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1207 | "Epoch 399/500\n", 1208 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1209 | "Epoch 400/500\n", 1210 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1211 | "Epoch 401/500\n", 1212 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1213 | "Epoch 402/500\n", 1214 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1215 | "Epoch 403/500\n", 1216 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1217 | "Epoch 404/500\n", 1218 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1219 | "Epoch 405/500\n", 1220 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1221 | "Epoch 406/500\n", 1222 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1223 | "Epoch 407/500\n", 1224 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1225 | "Epoch 408/500\n", 1226 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1227 | "Epoch 409/500\n", 1228 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1229 | "Epoch 410/500\n", 1230 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1231 | "Epoch 411/500\n", 1232 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1233 | "Epoch 412/500\n", 1234 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1235 | "Epoch 413/500\n", 1236 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1237 | "Epoch 414/500\n", 1238 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1239 | "Epoch 415/500\n", 1240 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1241 | "Epoch 416/500\n", 1242 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1243 | "Epoch 417/500\n", 1244 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1245 | "Epoch 418/500\n", 1246 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n", 1247 | "Epoch 419/500\n", 1248 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1249 | "Epoch 420/500\n", 1250 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n", 1251 | "Epoch 421/500\n", 1252 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1253 | "Epoch 422/500\n", 1254 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1255 | "Epoch 423/500\n", 1256 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1257 | "Epoch 424/500\n", 1258 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n", 1259 | "Epoch 425/500\n", 1260 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1261 | "Epoch 426/500\n", 1262 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n", 1263 | "Epoch 427/500\n", 1264 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1265 | "Epoch 428/500\n", 1266 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1267 | "Epoch 429/500\n", 1268 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1269 | "Epoch 430/500\n", 1270 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1271 | "Epoch 431/500\n", 1272 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n", 1273 | "Epoch 432/500\n", 1274 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1275 | "Epoch 433/500\n", 1276 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n", 1277 | "Epoch 434/500\n", 1278 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n", 1279 | "Epoch 435/500\n", 1280 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n", 1281 | "Epoch 436/500\n", 1282 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1283 | "Epoch 437/500\n", 1284 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1285 | "Epoch 438/500\n", 1286 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1287 | "Epoch 439/500\n", 1288 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n", 1289 | "Epoch 440/500\n", 1290 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1291 | "Epoch 441/500\n", 1292 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n", 1293 | "Epoch 442/500\n", 1294 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n", 1295 | "Epoch 443/500\n", 1296 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0016\n", 1297 | "Epoch 444/500\n", 1298 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n", 1299 | "Epoch 445/500\n", 1300 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n", 1301 | "Epoch 446/500\n", 1302 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n", 1303 | "Epoch 447/500\n", 1304 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n", 1305 | "Epoch 448/500\n", 1306 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0016\n", 1307 | "Epoch 449/500\n", 1308 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n", 1309 | "Epoch 450/500\n", 1310 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0019\n", 1311 | "Epoch 451/500\n", 1312 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n", 1313 | "Epoch 452/500\n", 1314 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n", 1315 | "Epoch 453/500\n", 1316 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0041\n", 1317 | "Epoch 454/500\n", 1318 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0028\n", 1319 | "Epoch 455/500\n", 1320 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0034\n", 1321 | "Epoch 456/500\n", 1322 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0019\n", 1323 | "Epoch 457/500\n", 1324 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0025\n", 1325 | "Epoch 458/500\n", 1326 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n", 1327 | "Epoch 459/500\n", 1328 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0028\n", 1329 | "Epoch 460/500\n", 1330 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0025\n", 1331 | "Epoch 461/500\n", 1332 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0022\n", 1333 | "Epoch 462/500\n", 1334 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0031\n", 1335 | "Epoch 463/500\n", 1336 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0044\n", 1337 | "Epoch 464/500\n", 1338 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0028\n", 1339 | "Epoch 465/500\n", 1340 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0031\n", 1341 | "Epoch 466/500\n", 1342 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n", 1343 | "Epoch 467/500\n", 1344 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n", 1345 | "Epoch 468/500\n", 1346 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n", 1347 | "Epoch 469/500\n", 1348 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0047\n", 1349 | "Epoch 470/500\n", 1350 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0044\n", 1351 | "Epoch 471/500\n", 1352 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0085\n", 1353 | "Epoch 472/500\n", 1354 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0100\n", 1355 | "Epoch 473/500\n", 1356 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0163\n", 1357 | "Epoch 474/500\n", 1358 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0157\n", 1359 | "Epoch 475/500\n", 1360 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0059\n", 1361 | "Epoch 476/500\n", 1362 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0053\n", 1363 | "Epoch 477/500\n", 1364 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0066\n", 1365 | "Epoch 478/500\n", 1366 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0078\n", 1367 | "Epoch 479/500\n", 1368 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0091\n", 1369 | "Epoch 480/500\n", 1370 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0138\n", 1371 | "Epoch 481/500\n", 1372 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0066\n", 1373 | "Epoch 482/500\n", 1374 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0081\n", 1375 | "Epoch 483/500\n", 1376 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0119\n", 1377 | "Epoch 484/500\n", 1378 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0185\n", 1379 | "Epoch 485/500\n", 1380 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0100\n", 1381 | "Epoch 486/500\n", 1382 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0138\n", 1383 | "Epoch 487/500\n", 1384 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0238\n", 1385 | "Epoch 488/500\n", 1386 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0122\n", 1387 | "Epoch 489/500\n", 1388 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0110\n", 1389 | "Epoch 490/500\n", 1390 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0150\n", 1391 | "Epoch 491/500\n", 1392 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0041\n", 1393 | "Epoch 492/500\n", 1394 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n", 1395 | "Epoch 493/500\n", 1396 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0022\n", 1397 | "Epoch 494/500\n", 1398 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0050\n", 1399 | "Epoch 495/500\n", 1400 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0053\n", 1401 | "Epoch 496/500\n", 1402 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0028\n", 1403 | "Epoch 497/500\n", 1404 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0047\n", 1405 | "Epoch 498/500\n", 1406 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0128\n", 1407 | "Epoch 499/500\n", 1408 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0185\n", 1409 | "Epoch 500/500\n", 1410 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0222\n" 1411 | ] 1412 | } 1413 | ], 1414 | "source": [ 1415 | "# train asr model\n", 1416 | "history = pipeline.fit(train_dataset = train_data, batch_size=128, epochs=500)\n", 1417 | "\n", 1418 | "# history = pipeline.fit_iter(train_dataset = train_data, batch_size=32, epochs=3,iter_num=500,checkpoint=project_path+'checkpoints')\n", 1419 | "# history = pipeline.fit_generator(train_dataset = train_data, batch_size=32, epochs=500)" 1420 | ] 1421 | }, 1422 | { 1423 | "cell_type": "code", 1424 | "execution_count": null, 1425 | "metadata": { 1426 | "colab": {}, 1427 | "colab_type": "code", 1428 | "id": "5WbeF-OWwhZB" 1429 | }, 1430 | "outputs": [], 1431 | "source": [ 1432 | "# save deepasr ctc pipeline\n", 1433 | "pipeline.save(project_path+'checkpoints')" 1434 | ] 1435 | }, 1436 | { 1437 | "cell_type": "markdown", 1438 | "metadata": { 1439 | "colab_type": "text", 1440 | "id": "o_psolNH4XFl" 1441 | }, 1442 | "source": [ 1443 | "# 4. Model testing" 1444 | ] 1445 | }, 1446 | { 1447 | "cell_type": "code", 1448 | "execution_count": 12, 1449 | "metadata": { 1450 | "colab": { 1451 | "base_uri": "https://localhost:8080/", 1452 | "height": 34 1453 | }, 1454 | "colab_type": "code", 1455 | "id": "EEgiUEkVc07E", 1456 | "outputId": "fb6184a0-c0d9-4fe9-f445-37477d4661ff" 1457 | }, 1458 | "outputs": [ 1459 | { 1460 | "name": "stdout", 1461 | "output_type": "stream", 1462 | "text": [ 1463 | "WARNING:tensorflow:No training configuration found in save file: the model was *not* compiled. Compile it manually.\n" 1464 | ] 1465 | } 1466 | ], 1467 | "source": [ 1468 | "# load saved ctc pipeline\n", 1469 | "pipeline1 = asr.pipeline.load(project_path+'checkpoints')" 1470 | ] 1471 | }, 1472 | { 1473 | "cell_type": "code", 1474 | "execution_count": 13, 1475 | "metadata": { 1476 | "colab": { 1477 | "base_uri": "https://localhost:8080/", 1478 | "height": 67 1479 | }, 1480 | "colab_type": "code", 1481 | "id": "xkPo_3SMtzHp", 1482 | "outputId": "bcdc7fa7-6852-4aa8-99e3-6db3d8508ada" 1483 | }, 1484 | "outputs": [ 1485 | { 1486 | "name": "stdout", 1487 | "output_type": "stream", 1488 | "text": [ 1489 | "Audio File: ./LibriSpeech/train-clean-100/27/124992/27-124992-0063.flac\n", 1490 | "Audio Transcription: WENT THROUGH THE PLAINS BUT WHEN THEY CAME NEAR THE MOUNTAINS\n", 1491 | "Trancript length: 61\n" 1492 | ] 1493 | } 1494 | ], 1495 | "source": [ 1496 | "# get testing audio and transcript from dataset\n", 1497 | "index = np.random.randint(train_data.shape[0])\n", 1498 | "data = train_data.iloc[index]\n", 1499 | "test_file = data[0]\n", 1500 | "test_transcript = data[1]\n", 1501 | "# Audio file\n", 1502 | "print(\"Audio File:\",test_file)\n", 1503 | "# ground truth\n", 1504 | "print(\"Audio Transcription:\", test_transcript)\n", 1505 | "print(\"Transcript length:\",len(test_transcript))" 1506 | ] 1507 | }, 1508 | { 1509 | "cell_type": "code", 1510 | "execution_count": 14, 1511 | "metadata": { 1512 | "colab": { 1513 | "base_uri": "https://localhost:8080/", 1514 | "height": 87 1515 | }, 1516 | "colab_type": "code", 1517 | "id": "moqXWTQVvdxC", 1518 | "outputId": "0645ce28-1da9-447e-cc9d-93a4f57096c8" 1519 | }, 1520 | "outputs": [ 1521 | { 1522 | "name": "stdout", 1523 | "output_type": "stream", 1524 | "text": [ 1525 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/backend.py:5811: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.\n", 1526 | "Instructions for updating:\n", 1527 | "Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.\n" 1528 | ] 1529 | } 1530 | ], 1531 | "source": [ 1532 | "# predict labels\n", 1533 | "pred= pipeline1.predict(test_file)" 1534 | ] 1535 | }, 1536 | { 1537 | "cell_type": "code", 1538 | "execution_count": 15, 1539 | "metadata": { 1540 | "colab": { 1541 | "base_uri": "https://localhost:8080/", 1542 | "height": 34 1543 | }, 1544 | "colab_type": "code", 1545 | "id": "oNvRyWq8weZs", 1546 | "outputId": "88732c3b-5412-4a84-bb25-d517d4251a8c" 1547 | }, 1548 | "outputs": [ 1549 | { 1550 | "data": { 1551 | "text/plain": [ 1552 | "'WENT THROUGH THE PLAINS BUT WHEN THEY CAME NEAR THE MOUNTAINS'" 1553 | ] 1554 | }, 1555 | "execution_count": 15, 1556 | "metadata": { 1557 | "tags": [] 1558 | }, 1559 | "output_type": "execute_result" 1560 | } 1561 | ], 1562 | "source": [ 1563 | "pred[0].upper()" 1564 | ] 1565 | }, 1566 | { 1567 | "cell_type": "code", 1568 | "execution_count": null, 1569 | "metadata": { 1570 | "colab": {}, 1571 | "colab_type": "code", 1572 | "id": "AFMoK13mtR6V" 1573 | }, 1574 | "outputs": [], 1575 | "source": [] 1576 | } 1577 | ], 1578 | "metadata": { 1579 | "accelerator": "GPU", 1580 | "colab": { 1581 | "collapsed_sections": [], 1582 | "machine_shape": "hm", 1583 | "name": "DeepAsr-CTC_Pipeline.ipynb", 1584 | "provenance": [] 1585 | }, 1586 | "kernelspec": { 1587 | "display_name": "Python 3", 1588 | "language": "python", 1589 | "name": "python3" 1590 | }, 1591 | "language_info": { 1592 | "codemirror_mode": { 1593 | "name": "ipython", 1594 | "version": 3 1595 | }, 1596 | "file_extension": ".py", 1597 | "mimetype": "text/x-python", 1598 | "name": "python", 1599 | "nbconvert_exporter": "python", 1600 | "pygments_lexer": "ipython3", 1601 | "version": "3.7.5" 1602 | } 1603 | }, 1604 | "nbformat": 4, 1605 | "nbformat_minor": 4 1606 | } 1607 | --------------------------------------------------------------------------------