├── deepasr
├── utils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── utils.cpython-36.pyc
│ │ ├── utils.cpython-37.pyc
│ │ ├── __init__.cpython-36.pyc
│ │ └── __init__.cpython-37.pyc
│ ├── getmeta.py
│ └── utils.py
├── vocab
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── alphabet.cpython-36.pyc
│ │ └── alphabet.cpython-37.pyc
│ ├── alphabet-en.txt
│ └── alphabet.py
├── evaluate
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── distance.cpython-37.pyc
│ │ └── evaluate.cpython-37.pyc
│ ├── evaluate.py
│ ├── activations.py
│ └── distance.py
├── decoder
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── decoder.cpython-36.pyc
│ │ └── decoder.cpython-37.pyc
│ └── decoder.py
├── augmentation
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── augmentation.cpython-36.pyc
│ │ ├── augmentation.cpython-37.pyc
│ │ ├── spec_augment.cpython-36.pyc
│ │ └── spec_augment.cpython-37.pyc
│ ├── augmentation.py
│ └── spec_augment.py
├── __pycache__
│ └── __init__.cpython-37.pyc
├── features
│ ├── __pycache__
│ │ ├── mfcc.cpython-36.pyc
│ │ ├── mfcc.cpython-37.pyc
│ │ ├── sigproc.cpython-36.pyc
│ │ ├── sigproc.cpython-37.pyc
│ │ ├── __init__.cpython-36.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── filter_banks.cpython-36.pyc
│ │ ├── filter_banks.cpython-37.pyc
│ │ ├── get_features.cpython-37.pyc
│ │ ├── spectrogram.cpython-36.pyc
│ │ ├── spectrogram.cpython-37.pyc
│ │ ├── feature_extractor.cpython-36.pyc
│ │ └── feature_extractor.cpython-37.pyc
│ ├── __init__.py
│ ├── filter_banks.py
│ ├── get_features.py
│ ├── feature_extractor.py
│ ├── spectrogram.py
│ ├── sigproc.py
│ └── mfcc.py
├── model
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── deepspeech2.cpython-37.pyc
│ │ └── deepasrnetwork1.cpython-37.pyc
│ ├── __init__.py
│ ├── compilemodel.py
│ ├── deepasrnetwork1.py
│ └── deepspeech2.py
├── pipeline
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── pipeline.cpython-36.pyc
│ │ ├── pipeline.cpython-37.pyc
│ │ ├── ctc_pipeline.cpython-36.pyc
│ │ ├── ctc_pipeline.cpython-37.pyc
│ │ └── get_pipeline.cpython-37.pyc
│ ├── __init__.py
│ ├── pipeline.py
│ ├── get_pipeline.py
│ └── ctc_pipeline.py
└── __init__.py
├── setup.cfg
├── MANIFEST.in
├── setup.py
├── app.py
├── README.md
├── LICENSE
└── DeepAsr_CTC_Pipeline.ipynb
/deepasr/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *
2 |
--------------------------------------------------------------------------------
/deepasr/vocab/__init__.py:
--------------------------------------------------------------------------------
1 | from .alphabet import Alphabet
2 |
--------------------------------------------------------------------------------
/deepasr/evaluate/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluate import calculate_error_rates
2 |
--------------------------------------------------------------------------------
/deepasr/decoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .decoder import Decoder, GreedyDecoder, BeamSearchDecoder
2 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [egg_info]
2 | tag_build =
3 | tag_date = 0
4 | [metadata]
5 | description-file = README.md
--------------------------------------------------------------------------------
/deepasr/augmentation/__init__.py:
--------------------------------------------------------------------------------
1 | from .augmentation import Augmentation
2 | from .spec_augment import SpecAugment
3 |
--------------------------------------------------------------------------------
/deepasr/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/utils/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/utils/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/utils/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/mfcc.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/mfcc.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/mfcc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/mfcc.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/model/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/model/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/model/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/model/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/utils/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/utils/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/vocab/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/vocab/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/vocab/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/vocab/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/vocab/__pycache__/alphabet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/vocab/__pycache__/alphabet.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/vocab/__pycache__/alphabet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/vocab/__pycache__/alphabet.cpython-37.pyc
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include app.py
4 |
5 | include deepasr/vocab/*.txt # Alphabets
6 | recursive-include deepasr *.py
--------------------------------------------------------------------------------
/deepasr/decoder/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/decoder/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/decoder/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/decoder/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/decoder/__pycache__/decoder.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/decoder/__pycache__/decoder.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/decoder/__pycache__/decoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/decoder/__pycache__/decoder.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/sigproc.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/sigproc.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/sigproc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/sigproc.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/evaluate/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/evaluate/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/evaluate/__pycache__/distance.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/evaluate/__pycache__/distance.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/evaluate/__pycache__/evaluate.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/evaluate/__pycache__/evaluate.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/model/__pycache__/deepspeech2.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/model/__pycache__/deepspeech2.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/pipeline/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/pipeline/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/pipeline/__pycache__/pipeline.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/pipeline.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/pipeline/__pycache__/pipeline.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/pipeline.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/augmentation/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/augmentation/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/filter_banks.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/filter_banks.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/filter_banks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/filter_banks.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/get_features.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/get_features.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/spectrogram.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/spectrogram.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/spectrogram.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/spectrogram.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .compilemodel import compile_model
2 | from .deepspeech2 import get_deepspeech2
3 | from .deepasrnetwork1 import get_deepasrnetwork1
4 |
--------------------------------------------------------------------------------
/deepasr/model/__pycache__/deepasrnetwork1.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/model/__pycache__/deepasrnetwork1.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/pipeline/__pycache__/ctc_pipeline.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/ctc_pipeline.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/pipeline/__pycache__/ctc_pipeline.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/ctc_pipeline.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/pipeline/__pycache__/get_pipeline.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/pipeline/__pycache__/get_pipeline.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/augmentation/__pycache__/augmentation.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/augmentation.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/augmentation/__pycache__/augmentation.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/augmentation.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/augmentation/__pycache__/spec_augment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/spec_augment.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/augmentation/__pycache__/spec_augment.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/augmentation/__pycache__/spec_augment.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/feature_extractor.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/feature_extractor.cpython-36.pyc
--------------------------------------------------------------------------------
/deepasr/features/__pycache__/feature_extractor.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scionoftech/DeepAsr/HEAD/deepasr/features/__pycache__/feature_extractor.cpython-37.pyc
--------------------------------------------------------------------------------
/deepasr/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline import Pipeline
2 | from .ctc_pipeline import CTCPipeline
3 | from .get_pipeline import load
4 | # from .get_pipeline import load_checkpoint
5 |
--------------------------------------------------------------------------------
/deepasr/augmentation/augmentation.py:
--------------------------------------------------------------------------------
1 | import abc
2 | import numpy as np
3 |
4 |
5 | class Augmentation:
6 |
7 | @abc.abstractmethod
8 | def __call__(self, batch_features: np.ndarray) -> np.ndarray:
9 | pass
10 |
--------------------------------------------------------------------------------
/deepasr/features/__init__.py:
--------------------------------------------------------------------------------
1 | from .feature_extractor import FeaturesExtractor
2 | from .filter_banks import FilterBanks
3 | from .spectrogram import Spectrogram
4 | from . import mfcc
5 | from . import sigproc
6 | from .get_features import preprocess
7 |
--------------------------------------------------------------------------------
/deepasr/__init__.py:
--------------------------------------------------------------------------------
1 | from . import augmentation
2 | from . import decoder
3 | from . import evaluate
4 | from . import features
5 | from . import model
6 | from . import pipeline
7 | from . import utils
8 | from . import vocab
9 |
10 | # Version of the deepasr package
11 | __version__ = "0.1.1"
12 |
--------------------------------------------------------------------------------
/deepasr/utils/getmeta.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from tinytag import TinyTag
3 |
4 |
5 | # https://pypi.org/project/tinytag/
6 |
7 | def get_file_tags(audio_file):
8 | tag = TinyTag.get(audio_file)
9 | print(tag.filesize, '|', tag.audio_offest, "|", tag.bitrate, "|", tag.channels, "|", tag.duration, "|",
10 | tag.samplerate, "|", tag.audio_offset)
11 |
12 |
13 | if __name__ == "__main__":
14 | get_file_tags(sys.argv[1])
15 |
--------------------------------------------------------------------------------
/deepasr/vocab/alphabet-en.txt:
--------------------------------------------------------------------------------
1 | #
2 | # Alphabet is the list of valid characters. There are two special characters:
3 | # - space: on the beginning
4 | # - blank: default added as the last char
5 | #
6 | # To comment the line use `#`
7 | #
8 |
9 | a
10 | b
11 | c
12 | d
13 | e
14 | f
15 | g
16 | h
17 | i
18 | j
19 | k
20 | l
21 | m
22 | n
23 | o
24 | p
25 | q
26 | r
27 | s
28 | t
29 | u
30 | v
31 | w
32 | x
33 | y
34 | z
35 | '
36 |
37 | # End of vocabulary
38 | # The last (non-comment) blank line represent the blank token
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open('README.md') as f:
4 | long_description = f.read()
5 |
6 | setuptools.setup(
7 | name="deepasr",
8 | version="0.1.2",
9 | author="Sai Kumar Yava",
10 | author_email="saikumar.geek@gmail.com",
11 | description="Keras(Tensorflow) implementations of Automatic Speech Recognition",
12 | long_description=long_description,
13 | long_description_content_type='text/markdown',
14 | url="https://github.com/scionoftech/DeepAsr",
15 | include_package_data=True,
16 | packages=['deepasr'],
17 | keywords=['deepspeech', 'asr', 'speech recognition', 'speech to text'],
18 | license='GNU',
19 | install_requires=['tensorflow>=2.0', 'pandas', 'tables', 'scipy', 'librosa'],
20 | python_requires='>=3.6',
21 | )
22 |
--------------------------------------------------------------------------------
/deepasr/features/filter_banks.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .mfcc import fbank
3 | from . import feature_extractor
4 |
5 |
6 | class FilterBanks(feature_extractor.FeaturesExtractor):
7 |
8 | def __init__(self, features_num: int, samplerate: int = 16000, is_standardization=True, **kwargs):
9 | self.features_num = features_num
10 | self.is_standardization = is_standardization
11 | self.params = kwargs
12 | self.samplerate = samplerate
13 |
14 | def make_features(self, audio: np.ndarray) -> np.ndarray:
15 | """ Use `python_speech_features` lib to extract log filter banks from
16 | the features file. """
17 | audio = self.normalize(audio.astype(np.float32))
18 | audio = (audio * np.iinfo(np.int16).max).astype(np.int16)
19 | feat, energy = fbank(
20 | audio, nfilt=self.features_num, samplerate=self.samplerate, **self.params
21 | )
22 | features = np.log(feat)
23 | return self.standardize(features) if self.is_standardization else features
24 |
--------------------------------------------------------------------------------
/deepasr/features/get_features.py:
--------------------------------------------------------------------------------
1 | from .filter_banks import FilterBanks
2 | from .spectrogram import Spectrogram
3 |
4 |
5 | def preprocess(feature_type: str = 'fbank', features_num: int = 161,
6 | samplerate: int = 16000,
7 | winlen: float = 0.02,
8 | winstep: float = 0.01,
9 | winfunc=None,
10 | is_standardization=True,
11 | pad_audio_to: int = 0):
12 | ''' This method extracts the audio features based on fbank or spectrogram '''
13 | if feature_type == 'fbank':
14 | features_extractor = FilterBanks(features_num=features_num, samplerate=samplerate, winlen=winlen,
15 | winstep=winstep, winfunc=winfunc,
16 | is_standardization=is_standardization)
17 | return features_extractor
18 | elif feature_type == 'spectrogram':
19 | features_extractor = Spectrogram(
20 | features_num=features_num,
21 | samplerate=samplerate,
22 | winlen=winlen,
23 | winstep=winstep,
24 | winfunc=winfunc,
25 | pad_audio_to=pad_audio_to
26 | )
27 | return features_extractor
28 |
--------------------------------------------------------------------------------
/deepasr/pipeline/pipeline.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import List
3 | import numpy as np
4 | import pandas as pd
5 | from tensorflow import keras
6 | import sys
7 |
8 | sys.path.append("..")
9 | from deepasr.decoder import Decoder
10 | from deepasr.features import FeaturesExtractor
11 | from deepasr.vocab import Alphabet
12 |
13 |
14 | class Pipeline:
15 |
16 | @property
17 | @abc.abstractmethod
18 | def alphabet(self) -> Alphabet:
19 | pass
20 |
21 | @property
22 | @abc.abstractmethod
23 | def features_extractor(self) -> FeaturesExtractor:
24 | pass
25 |
26 | @property
27 | @abc.abstractmethod
28 | def model(self) -> keras.Model:
29 | pass
30 |
31 | @property
32 | @abc.abstractmethod
33 | def decoder(self) -> Decoder:
34 | pass
35 |
36 | @abc.abstractmethod
37 | def fit(self,
38 | train_dataset: pd.DataFrame,
39 | val_dataset: pd.DataFrame,
40 | prepared_features=False,
41 | **kwargs) -> keras.callbacks.History:
42 | pass
43 |
44 | @abc.abstractmethod
45 | def predict(self, batch_audio: List[np.ndarray], **kwargs) -> List[str]:
46 | pass
47 |
48 | @abc.abstractmethod
49 | def save(self, directory: str):
50 | pass
51 |
--------------------------------------------------------------------------------
/deepasr/pipeline/get_pipeline.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 | from deepasr.utils import load_data
4 | from deepasr.pipeline import CTCPipeline
5 | from deepasr.model import compile_model
6 |
7 |
8 | def load(directory: str):
9 | """ Load each component of the CTC pipeline. """
10 |
11 | _label_len = load_data(os.path.join(directory, 'label_len.bin'))
12 | _optimizer = load_data(os.path.join(directory, 'optimizer.bin'))
13 | _network = tf.keras.models.load_model(os.path.join(directory, 'network.h5'))
14 | _model = _network
15 | _model = compile_model(_model, _optimizer, _label_len)
16 | _model.load_weights(os.path.join(directory, 'model_weights.h5'))
17 | _alphabet = load_data(os.path.join(directory, 'alphabet.bin'))
18 | _decoder = load_data(os.path.join(directory, 'decoder.bin'))
19 | _features_extractor = load_data(
20 | os.path.join(directory, 'feature_extractor.bin'))
21 | _multi_gpu_flag = load_data(os.path.join(directory, 'multi_gpu_flag.bin'))
22 | _sample_rate = load_data(os.path.join(directory, 'sample_rate.bin'))
23 | _mono = load_data(os.path.join(directory, 'mono.bin'))
24 |
25 | pipeline = CTCPipeline(
26 | alphabet=_alphabet, features_extractor=_features_extractor, model=_model, optimizer=_optimizer,
27 | decoder=_decoder, sample_rate=_sample_rate, mono=_mono, label_len=_label_len, multi_gpu=_multi_gpu_flag,
28 | temp_model=_network
29 | )
30 | return pipeline
31 |
--------------------------------------------------------------------------------
/deepasr/model/compilemodel.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.layers import *
2 | from tensorflow.keras.models import Model
3 | import tensorflow.keras.backend as K
4 | import logging
5 |
6 | logger = logging.getLogger('asr.pipeline')
7 |
8 |
9 | def ctc_loss(args):
10 | """ The CTC loss using TensorFlow's `ctc_loss`. """
11 | y_pred, labels, input_length, label_length = args
12 | return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
13 |
14 |
15 | def compile_model(_model, _optimizer, label_len=None):
16 | """ The compiled model means the model configured for training. """
17 |
18 | input_data = _model.inputs[0]
19 | y_pred = _model.outputs[0]
20 |
21 | # your ground truth data. The data you are going to compare with the model's outputs in training
22 | labels = Input(name='the_labels', shape=[label_len], dtype='float32')
23 | # the length (in steps, or chars this case) of each sample (sentence) in the y_pred tensor
24 | input_length = Input(name='input_length', shape=[1], dtype='float32')
25 | # the length (in steps, or chars this case) of each sample (sentence) in the y_true
26 | label_length = Input(name='label_length', shape=[1], dtype='float32')
27 | output = Lambda(ctc_loss, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
28 | _model = Model(inputs=[input_data, labels, input_length, label_length], outputs=output,
29 | name="DeepAsr")
30 | _model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=_optimizer,
31 | metrics=['accuracy'])
32 |
33 | # _model.summary()
34 | logger.info("Model is successfully compiled")
35 | return _model
36 |
--------------------------------------------------------------------------------
/deepasr/decoder/decoder.py:
--------------------------------------------------------------------------------
1 | import abc
2 | # import itertools
3 | from typing import List
4 | import numpy as np
5 | from tensorflow.keras import backend as K
6 |
7 |
8 | # https://www.tensorflow.org/api_docs/python/tf/keras/backend/ctc_decode
9 |
10 | class Decoder:
11 |
12 | @abc.abstractmethod
13 | def __call__(self, batch_logits: np.ndarray, input_length: int) -> List[np.ndarray]:
14 | pass
15 |
16 |
17 | class GreedyDecoder:
18 |
19 | def __call__(self, batch_logits: np.ndarray, input_length: int) -> List[np.ndarray]:
20 | """ Decode the best guess from logits using greedy algorithm. """
21 | # Choose the class with maximum probability
22 | # best_candidates = np.argmax(batch_logits, axis=2)
23 | # Merge repeated chars
24 | # decoded = [np.array([k for k, _ in itertools.groupby(best_candidate)])
25 | # for best_candidate in best_candidates]
26 | decoded = np.array(
27 | (K.eval(K.ctc_decode(batch_logits, [input_length], greedy=True)[0][0])).flatten().tolist())
28 | return [decoded]
29 |
30 |
31 | class BeamSearchDecoder:
32 |
33 | def __init__(self, beam_width: int, top_paths: int):
34 | self.beam_width = beam_width
35 | self.top_paths = top_paths
36 |
37 | def __call__(self, batch_logits: np.ndarray, input_length: int, **kwargs) -> List[
38 | np.ndarray]:
39 | """ Decode the best guess from logits using beam search algorithm. """
40 | decoded = np.array((K.eval(
41 | K.ctc_decode(batch_logits, [input_length], greedy=False, beam_width=self.beam_width,
42 | top_paths=self.top_paths)[0][
43 | 0])).flatten().tolist())
44 | return [decoded]
45 |
--------------------------------------------------------------------------------
/deepasr/features/feature_extractor.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import List, Tuple
3 | import numpy as np
4 |
5 |
6 | class FeaturesExtractor:
7 |
8 | def __index__(self):
9 | self.features_shape = None
10 |
11 | def __call__(self, batch_audio: List[np.ndarray]) -> np.ndarray:
12 | """ Extract features from the file list. """
13 | features = [self.make_features(audio) for audio in batch_audio]
14 | self.features_shape = max(features, key=len).shape
15 | X = self.align(features, self.features_shape)
16 | return X.astype(np.float16)
17 |
18 | @abc.abstractmethod
19 | def make_features(self, audio: np.ndarray) -> np.ndarray:
20 | pass
21 |
22 | @staticmethod
23 | def standardize(features: np.ndarray) -> np.ndarray:
24 | """ Standardize globally, independently of features. """
25 | mean = np.mean(features)
26 | std = np.std(features)
27 | return (features - mean) / std
28 |
29 | @staticmethod
30 | def normalize(audio: np.ndarray):
31 | """ Normalize float32 signal to [-1, 1] range. """
32 | gain = 1.0 / (np.max(np.abs(audio)) + 1e-5)
33 | return audio * gain
34 |
35 | @staticmethod
36 | def align(arrays: list, features_shape: Tuple, default=0) -> np.ndarray:
37 | """ Pad arrays (default along time dimensions). Return the single
38 | array (batch_size, time, features). """
39 | # max_array = max(arrays, key=len)
40 | X = np.full(shape=[len(arrays), *features_shape],
41 | fill_value=default, dtype=float)
42 | for index, array in enumerate(arrays):
43 | time_dim, features_dim = array.shape
44 | X[index, :time_dim] = array
45 | return X
46 |
--------------------------------------------------------------------------------
/deepasr/features/spectrogram.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 | from . import sigproc
4 | from . import feature_extractor
5 |
6 |
7 | class Spectrogram(feature_extractor.FeaturesExtractor):
8 |
9 | def __init__(self,
10 | features_num: int,
11 | samplerate: int,
12 | winlen: float,
13 | winstep: float,
14 | winfunc=None,
15 | is_standardization=True,
16 | pad_audio_to: int = 0):
17 | self.features_num = features_num
18 | self.winfunc = winfunc
19 | self.frame_len = int(winlen * samplerate)
20 | self.frame_step = int(winstep * samplerate)
21 | self.is_standardization = is_standardization
22 | self.pad_to = pad_audio_to
23 |
24 | def make_features(self, audio: np.ndarray) -> np.ndarray:
25 | """ Use `python_speech_features` lib to extract log-spectrogram's. """
26 | audio = self.normalize(audio.astype(np.float32))
27 | audio = (audio * np.iinfo(np.int16).max).astype(np.int16)
28 | audio = self.pad(audio) if self.pad_to else audio
29 | frames = sigproc.framesig(
30 | audio, self.frame_len, self.frame_step, self.winfunc
31 | )
32 | features = sigproc.logpowspec(
33 | frames, self.frame_len, norm=1
34 | )
35 | features = features[:, :self.features_num] # Cut high frequency part
36 | return self.standardize(features) if self.is_standardization else features
37 |
38 | def pad(self, audio: np.ndarray) -> np.ndarray:
39 | """ Padding signal is required if you play with mixed precession. """
40 | length = 1 + int((len(audio) - self.frame_len) // self.frame_step + 1)
41 | pad_size = (self.pad_to - length % self.pad_to) * self.frame_step
42 | return np.pad(audio, (0, pad_size), mode='constant')
43 |
--------------------------------------------------------------------------------
/deepasr/model/deepasrnetwork1.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from tensorflow import keras
4 | from tensorflow.keras.models import Model
5 | from tensorflow.keras.layers import *
6 | from tensorflow.keras.mixed_precision import experimental as mixed_precision
7 |
8 |
9 | def get_deepasrnetwork1(input_dim=None, output_dim=29,
10 | is_mixed_precision=True, random_state=1) -> keras.Model:
11 | """
12 |
13 | input_dim: int i wielokrotność 4
14 | output_dim: licba liter w słowniku
15 |
16 | """
17 | if is_mixed_precision:
18 | policy = mixed_precision.Policy('float32')
19 | mixed_precision.set_policy(policy)
20 |
21 | np.random.seed(random_state)
22 | tf.random.set_seed(random_state)
23 |
24 | # the input
25 | input_data = Input(name='the_input', shape=(None, input_dim), dtype='float32')
26 |
27 | # Batch normalize
28 | bn1 = BatchNormalization(axis=-1, name='BN_1')(input_data)
29 |
30 | # 1D Convs
31 | conv = Conv1D(filters=220, kernel_size=5, strides=1, padding='valid', activation='relu', name='Conv1D_1')(bn1)
32 | conv = BatchNormalization(name="CNBN_1")(conv)
33 | conv1 = Conv1D(filters=220, kernel_size=5, strides=1, padding='valid', activation='relu', name='Conv1D_2')(conv)
34 | conv1 = BatchNormalization(name="CNBN_2")(conv1)
35 |
36 | # RNN
37 | gru_1 = GRU(512, return_sequences=True, name='gru_1')(conv1)
38 | gru_2 = GRU(512, return_sequences=True, go_backwards=True, name='gru_2')(conv1)
39 |
40 | # merge tow gpu ouputs
41 | merged = concatenate([gru_1, gru_2])
42 | # Batch normalize
43 | bn2 = BatchNormalization(axis=-1, name="BN_2")(merged)
44 |
45 | dense = TimeDistributed(Dense(30))(bn2)
46 | y_pred = TimeDistributed(Dense(output_dim, activation='softmax', name='y_pred'), name='the_output')(dense)
47 |
48 | model = Model(inputs=input_data, outputs=y_pred)
49 |
50 | return model
51 |
--------------------------------------------------------------------------------
/deepasr/augmentation/spec_augment.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 | import numpy as np
3 |
4 |
5 | class SpecAugment:
6 |
7 | def __init__(self,
8 | F: int = None,
9 | mf: int = None,
10 | Tmin: int = None,
11 | Tmax: int = None,
12 | mt: int = None):
13 | """ SpecAugment: A Simple Data Augmentation Method. """
14 | self.F = F
15 | self.mf = mf
16 | self.Tmin = Tmin
17 | self.Tmax = Tmax
18 | self.mt = mt
19 |
20 | def __call__(self, batch_features: np.ndarray) -> np.ndarray:
21 | return np.stack([self.mask_features(features) for features in batch_features], axis=0)
22 |
23 | def mask_features(self, features: np.ndarray) -> np.ndarray:
24 | features = features.copy()
25 | time, channels = features.shape
26 | means = features.mean(axis=0) # The mean should be zero if features are normalized
27 | if self.F and self.mf:
28 | features = self.mask_frequencies(features, means, channels, self.F, self.mf)
29 | if self.Tmax and self.mt:
30 | features = self.mask_time(features, means, time, (self.Tmin, self.Tmax), self.mt)
31 | return features
32 |
33 | @staticmethod
34 | def mask_frequencies(features: np.ndarray, means: np.ndarray, channels: int, F: int, mf: int):
35 | for i in range(mf):
36 | f = np.random.random_integers(low=0, high=F)
37 | f0 = np.random.random_integers(low=0, high=channels - F)
38 | features[:, f0:f0 + f] = means[f0:f0 + f]
39 | return features
40 |
41 | @staticmethod
42 | def mask_time(features: np.ndarray, means: np.ndarray, time: int, T_range: Tuple[int, int], mt: int):
43 | Tmin, Tmax = T_range
44 | for i in range(mt):
45 | t = np.random.random_integers(low=Tmin, high=Tmax)
46 | t0 = np.random.random_integers(low=0, high=time - Tmax)
47 | features[t0:t0 + t, :] = means
48 | return features
49 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import tensorflow as tf
4 | import deepasr as asr
5 |
6 |
7 | # get CTCPipeline
8 | def get_config(feature_type: str = 'spectrogram', multi_gpu: bool = False):
9 | # audio feature extractor
10 | features_extractor = asr.features.preprocess(feature_type=feature_type, features_num=161,
11 | samplerate=16000,
12 | winlen=0.02,
13 | winstep=0.025,
14 | winfunc=np.hanning)
15 |
16 | # input label encoder
17 | alphabet_en = asr.vocab.Alphabet(lang='en')
18 | # training model
19 | model = asr.model.get_deepasrnetwork1(
20 | input_dim=161,
21 | output_dim=29,
22 | is_mixed_precision=True
23 | )
24 | # model optimizer
25 | optimizer = tf.keras.optimizers.Adam(
26 | lr=1e-4,
27 | beta_1=0.9,
28 | beta_2=0.999,
29 | epsilon=1e-8
30 | )
31 | # output label deocder
32 | decoder = asr.decoder.GreedyDecoder()
33 | # CTCPipeline
34 | pipeline = asr.pipeline.ctc_pipeline.CTCPipeline(
35 | alphabet=alphabet_en, features_extractor=features_extractor, model=model, optimizer=optimizer, decoder=decoder,
36 | sample_rate=16000, mono=True, multi_gpu=multi_gpu
37 | )
38 | return pipeline
39 |
40 |
41 | def run():
42 |
43 | train_data = pd.read_csv('train_data.csv')
44 |
45 | pipeline = get_config(feature_type = 'fbank', multi_gpu=False)
46 |
47 | # train asr model
48 | history = pipeline.fit(train_dataset=train_data, batch_size=128, epochs=500)
49 | # history = pipeline.fit_generator(train_dataset = train_data, batch_size=32, epochs=500)
50 |
51 | pipeline.save('./checkpoints')
52 |
53 | return history
54 |
55 |
56 | def test_model(test_data):
57 | test_data = pd.read_csv('test_data.csv')
58 | pipeline = asr.pipeline.load('checkpoints')
59 | print("Truth:", test_data['transcripts'].to_list()[0])
60 | print("Prediction", pipeline.predict(test_data['path'].to_list()[0]))
61 |
62 |
63 | if __name__ == "__main__":
64 | run()
65 | # test_model(test)
66 |
--------------------------------------------------------------------------------
/deepasr/evaluate/evaluate.py:
--------------------------------------------------------------------------------
1 | from typing import List, Iterable, Tuple, Union
2 | from collections import namedtuple
3 | import pandas as pd
4 | from . import distance
5 | # from .. import dataset
6 | from .. import pipeline
7 |
8 | Metric = namedtuple('Metric', ['transcript', 'prediction', 'wer', 'cer'])
9 |
10 |
11 | def calculate_error_rates(ctc_pipeline: pipeline.Pipeline,
12 | data: pd.DataFrame,
13 | return_metrics: bool = False
14 | ) -> Union[Tuple[float, float], pd.DataFrame]:
15 | """ Calculate base metrics: WER and CER. """
16 | metrics = []
17 | for audio, transcript in zip(data['path'].values, data['transcripts'].values):
18 | prediction = ctc_pipeline.predict(audio)
19 | batch_metrics = get_metrics(sources=prediction,
20 | destinations=[transcript])
21 | metrics.extend(batch_metrics)
22 | metrics = pd.DataFrame(metrics)
23 | return metrics if return_metrics else (metrics.wer.mean(), metrics.cer.mean())
24 |
25 |
26 | def get_metrics(sources: List[str],
27 | destinations: List[str]) -> Iterable[Metric]:
28 | """ Calculate base metrics in one batch: WER and CER. """
29 | for source, destination in zip(sources, destinations):
30 | wer_distance, *_ = distance.edit_distance(source.split(),
31 | destination.split())
32 | wer = wer_distance / len(destination.split())
33 |
34 | cer_distance, *_ = distance.edit_distance(list(source),
35 | list(destination))
36 | cer = cer_distance / len(destination)
37 | yield Metric(destination, source, wer, cer)
38 |
39 |
40 | def get_cer(source: str, destination: str) -> float:
41 | cer_distance, *_ = distance.edit_distance(list(source),
42 | list(destination))
43 | cer = cer_distance / len(destination)
44 |
45 | return cer
46 |
47 |
48 | def get_wer(source: str, destination: str) -> float:
49 | wer_distance, *_ = distance.edit_distance(source.split(),
50 | destination.split())
51 | wer = wer_distance / len(destination.split())
52 |
53 | return wer
54 |
--------------------------------------------------------------------------------
/deepasr/evaluate/activations.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import operator
3 | from typing import Callable, List, Union, Tuple
4 | import h5py
5 | import numpy as np
6 | import pandas as pd
7 | import tensorflow as tf
8 | from tensorflow import keras
9 | from . import evaluate
10 | # from .. import dataset
11 | from .. import pipeline
12 | from .. import utils
13 |
14 |
15 | def save_metrics_and_activations(pipeline: pipeline.Pipeline,
16 | data: pd.DataFrame,
17 | store_path: str,
18 | prepared_features: bool = False,
19 | return_metrics: bool = False
20 | ) -> Union[Tuple[float, float], pd.DataFrame]:
21 | columns = ['sample_id', 'transcript', 'prediction', 'wer', 'cer']
22 | references = pd.DataFrame(columns=columns).set_index('sample_id')
23 | get_activations = get_activations_function(pipeline.model)
24 |
25 | with h5py.File(store_path, mode='w') as store:
26 | for audio, transcript in zip(data['path'].values, data['transcripts'].values):
27 | features = audio if prepared_features else pipeline.features_extractor([utils.read_audio(audio)])
28 | *activations, y_hat = get_activations([features, 0])
29 | decoded_labels = pipeline.decoder(y_hat)
30 | predictions = pipeline.alphabet.get_batch_transcripts(decoded_labels)
31 | batch_metrics = list(evaluate.get_metrics(sources=predictions,
32 | destinations=transcript))
33 |
34 | save_in_store(store, [*activations, y_hat], batch_metrics, references)
35 |
36 | with pd.HDFStore(store_path, mode='r+') as store:
37 | store.put('references', references)
38 | metrics = pd.DataFrame(functools.reduce(operator.concat, batch_metrics))
39 | return metrics if return_metrics else (metrics.wer.mean(), metrics.cer.mean())
40 |
41 |
42 | def get_activations_function(model: keras.Model) -> Callable:
43 | """ Function which handle all activations through one pass. """
44 | inputs = [model.input, tf.keras.learning_phase()]
45 | outputs = [layer.output for layer in model.layers][1:]
46 | return tf.keras.function(inputs, outputs)
47 |
48 |
49 | def save_in_store(store: h5py.File,
50 | layer_outputs: List[np.ndarray],
51 | metrics: List[evaluate.Metric],
52 | references: pd.DataFrame):
53 | """ Save batch data into HDF5 file. """
54 | for index, metric in enumerate(metrics):
55 | sample_id = len(references)
56 | references.loc[sample_id] = metric
57 | for output_index, batch_layer_outputs in enumerate(layer_outputs):
58 | layer_output = batch_layer_outputs[index]
59 | store.create_dataset(f'outputs/{output_index}/{sample_id}', data=layer_output)
60 |
--------------------------------------------------------------------------------
/deepasr/utils/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import logging
4 | from functools import reduce
5 | from logging import Logger
6 | from typing import Any
7 | import numpy as np
8 | import librosa
9 | # from scipy.io import wavfile
10 | from tensorflow import keras
11 |
12 | # from google.cloud import storage
13 |
14 | logger = logging.getLogger('asr.utils')
15 |
16 |
17 | def load_data(file_path: str):
18 | """ Load arbitrary python objects from the pickled file. """
19 | with open(file_path, mode='rb') as file:
20 | return pickle.load(file)
21 |
22 |
23 | def save_data(data: Any, file_path: str):
24 | """ Save arbitrary python objects in the pickled file. """
25 | with open(file_path, mode='wb') as file:
26 | pickle.dump(data, file)
27 |
28 |
29 | # def download_from_bucket(bucket_name: str, remote_path: str, local_path: str):
30 | # """ Download the file from the public bucket. """
31 | # client = storage.Client.create_anonymous_client()
32 | # bucket = client.bucket(bucket_name)
33 | # blob = storage.Blob(remote_path, bucket)
34 | # blob.download_to_filename(local_path, client=client)
35 |
36 |
37 | # def maybe_download_from_bucket(bucket_name: str, remote_path: str, local_path: str):
38 | # """ Download file from the bucket if it does not exist. """
39 | # if os.path.isfile(local_path):
40 | # return
41 | # directory = os.path.dirname(local_path)
42 | # os.makedirs(directory, exist_ok=True)
43 | # logger.info('Downloading file from the bucket...')
44 | # download_from_bucket(bucket_name, remote_path, local_path)
45 |
46 |
47 | def read_audio(file_path: str, sample_rate: int, mono: bool) -> np.ndarray:
48 | """ Read already prepared features from the store. """
49 | # fs, audio = wavfile.read(file_path)
50 | audio = librosa.core.load(file_path, sr=sample_rate, mono=mono)[0]
51 | return audio
52 |
53 |
54 | def calculate_units(model: keras.Model) -> int:
55 | """ Calculate number of the model parameters. """
56 | units = 0
57 | for parameters in model.get_weights():
58 | units += reduce(lambda x, y: x * y, parameters.shape)
59 | return units
60 |
61 |
62 | def create_logger(file_path=None, level=20, name='asr') -> Logger:
63 | """ Create the logger and handlers both console and file. """
64 | logger = logging.getLogger(name)
65 | logger.setLevel(level)
66 | formatter = logging.Formatter(
67 | '%(asctime)s [%(levelname)-8s] [%(name)-20s] %(message)s',
68 | datefmt='%Y-%m-%d %H:%M:%S'
69 | )
70 | console = logging.StreamHandler()
71 | console.setFormatter(formatter)
72 | logger.addHandler(console) # handle all messages from logger
73 | if file_path:
74 | file_handler = logging.FileHandler(file_path, mode='w')
75 | file_handler.setFormatter(formatter)
76 | logger.addHandler(file_handler)
77 | return logger
78 |
--------------------------------------------------------------------------------
/deepasr/vocab/alphabet.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import List
3 | import numpy as np
4 |
5 |
6 | class Alphabet:
7 | """
8 | Read alphabet-pl.txt, which is the list of valid characters. Alphabet has
9 | two special characters:
10 | - space on the beginning
11 | - blank token default added as the last char
12 |
13 | This class is used to convert characters to labels and vice versa.
14 | """
15 |
16 | def __init__(self, file_path: str = None, lang: str = None):
17 | self.size = 0
18 | self.blank_token = None
19 | self._label_to_str = []
20 | self._str_to_label = {}
21 | if lang in ['en', 'pl']:
22 | directory = os.path.dirname(os.path.abspath(__file__))
23 | file_path = os.path.join(directory, f'alphabet-{lang}.txt')
24 | elif not file_path:
25 | raise ValueError
26 | self.process_alphabet_file(file_path)
27 |
28 | def __contains__(self, char: str) -> bool:
29 | """ Check if char is in the Alphabet. """
30 | return char in self._str_to_label
31 |
32 | def string_from_label(self, label: int) -> str:
33 | """ Convert label to string. """
34 | return self._label_to_str[label]
35 |
36 | def label_from_string(self, string: str) -> int:
37 | """ Convert string to label. """
38 | return self._str_to_label[string]
39 |
40 | def process_alphabet_file(self, file_path: str):
41 | """ Read alphabet-pl.txt file. """
42 | with open(file_path) as file:
43 | for line in file:
44 | if line.startswith('#'):
45 | continue
46 | # Char can contain more than one letter
47 | char = line[:-1] # remove the line ending
48 | self._label_to_str.append(char)
49 | self._str_to_label[char] = self.size
50 | self.size += 1
51 | # Blank token is added on the end
52 | self.blank_token = self.size - 1
53 |
54 | def get_batch_labels(self, transcripts: List[str]) -> np.ndarray:
55 | """ Convert batch transcripts to labels """
56 | batch_labels = [[self.label_from_string(c) for c in transcript.lower().strip() if c in self]
57 | for transcript in transcripts]
58 | max_len = max(map(len, batch_labels))
59 | default_value = self.blank_token
60 | for labels in batch_labels:
61 | remainder = [default_value] * (max_len - len(labels))
62 | labels.extend(remainder)
63 | return np.array(batch_labels)
64 |
65 | def get_batch_transcripts(self, sequences: List[np.ndarray]) -> List[str]:
66 | """ Convert label sequences to transcripts. The `-1` also means the
67 | blank tag """
68 | return [''.join(self.string_from_label(char_label)
69 | for char_label in sequence
70 | if char_label not in (-1, self.blank_token))
71 | for sequence in sequences]
72 |
73 |
74 | if __name__ == "__main__":
75 | al = Alphabet(lang='en')
76 | labels = al.get_batch_labels(["Hi how are you", "i am vey well, what about you"])
77 | print(labels)
78 |
--------------------------------------------------------------------------------
/deepasr/evaluate/distance.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | from typing import Tuple, List
3 | from collections import defaultdict
4 | import numpy as np
5 |
6 |
7 | def edit_distance(source: List[str],
8 | destination: List[str]) -> Tuple[int, np.ndarray, np.ndarray]:
9 | """
10 | Calculation of edit distance between two sequences.
11 |
12 | This is the Levenshtein distance with the substitution cost equals 1.
13 | It is the iterative method with the full matrix support.
14 | O(nm) time and space complexity.
15 |
16 | References:
17 | https://web.stanford.edu/class/cs124/lec/med.pdf
18 | https://www.python-course.eu/levenshtein_distance.php
19 | """
20 | size_x = len(source) + 1
21 | size_y = len(destination) + 1
22 | matrix = np.zeros([size_x, size_y])
23 | matrix[:, 0] = np.arange(0, size_x)
24 | matrix[0, :] = np.arange(0, size_y)
25 | backtrace = np.zeros_like(matrix, dtype=[('del', bool),
26 | ('sub', bool),
27 | ('ins', bool),
28 | ('cost', int)])
29 | backtrace[:, 0] = (True, False, False, 0)
30 | backtrace[0, :] = (False, False, True, 0)
31 | for x, y in itertools.product(range(1, size_x),
32 | range(1, size_y)):
33 | if source[x - 1] == destination[y - 1]:
34 | cost = 0
35 | else:
36 | cost = 1
37 | delete = matrix[x - 1][y] + 1
38 | insert = matrix[x][y - 1] + 1
39 | substitute = matrix[x - 1][y - 1] + cost
40 | min_dist = min(delete, insert, substitute)
41 | matrix[x, y] = min_dist
42 | backtrace[x, y] = (delete == min_dist,
43 | substitute == min_dist,
44 | insert == min_dist,
45 | cost)
46 | return matrix[size_x - 1, size_y - 1], matrix, backtrace
47 |
48 |
49 | def simple_backtrace(backtrace: np.ndarray):
50 | """ Calculate the editing path via the backtrace. """
51 | rows, columns = backtrace.shape
52 | i, j = rows - 1, columns - 1
53 | backtrace_indices = [(i, j, 'sub', 0)]
54 | while (i, j) != (0, 0):
55 | delete, substitute, insert, cost = backtrace[i, j]
56 | if insert:
57 | operation = 'ins'
58 | i, j = i, j - 1
59 | elif substitute:
60 | operation = 'sub'
61 | i, j = i - 1, j - 1
62 | elif delete:
63 | operation = 'del'
64 | i, j = i - 1, j
65 | else:
66 | raise KeyError("Backtrace matrix wrong defined")
67 | backtrace_indices.append((i, j, operation, cost))
68 | return list(reversed(backtrace_indices))
69 |
70 |
71 | def decode_path(best_path: List[Tuple[int, int, str, int]],
72 | source: List[str],
73 | destination: List[str]):
74 | """ Collect all transformations needed to go from `source` to
75 | `destination`. """
76 | to_delete, to_insert, to_substitute = [], [], defaultdict(list)
77 | for index, (i, j, operation, cost) in enumerate(best_path):
78 | if operation == 'del':
79 | item = source[i]
80 | to_delete.append(item)
81 | elif operation == 'sub' and cost:
82 | # without cost sub operation indicates correctness
83 | wrong_item, target_item = source[i], destination[j]
84 | to_substitute[target_item].append(wrong_item)
85 | elif operation == 'ins':
86 | item = destination[j]
87 | to_insert.append(item)
88 | return to_delete, to_insert, to_substitute
89 |
--------------------------------------------------------------------------------
/deepasr/model/deepspeech2.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from tensorflow import keras
4 | from tensorflow.keras.models import Model
5 | from tensorflow.keras.layers import *
6 | from tensorflow.keras.mixed_precision import experimental as mixed_precision
7 | # from tensorflow.keras.activations import relu
8 |
9 |
10 | # def clipped_relu(x):
11 | # return relu(x, max_value=20)
12 |
13 |
14 | def get_deepspeech2(input_dim=None, output_dim=29,
15 | is_mixed_precision=True, random_state=1) -> keras.Model:
16 | """
17 |
18 | input_dim: int i wielokrotność 4
19 | output_dim: licba liter w słowniku
20 |
21 | """
22 | if is_mixed_precision:
23 | policy = mixed_precision.Policy('float32')
24 | mixed_precision.set_policy(policy)
25 |
26 | np.random.seed(random_state)
27 | tf.random.set_seed(random_state)
28 |
29 | # the input
30 | input_data = Input(name='the_input', shape=(None, input_dim), dtype='float32')
31 |
32 | # Batch normalize
33 | bn1 = BatchNormalization(axis=-1, name='BN_1')(input_data)
34 |
35 | # 1D Convs
36 | conv1 = Conv1D(512, 5, strides=1, activation='relu', name='Conv1D_1')(bn1)
37 | cbn1 = BatchNormalization(axis=-1, name='CBN_1')(conv1)
38 | conv2 = Conv1D(512, 5, strides=1, activation='relu', name='Conv1D_2')(cbn1)
39 | cbn2 = BatchNormalization(axis=-1, name='CBN_2')(conv2)
40 | conv3 = Conv1D(512, 5, strides=1, activation='relu', name='Conv1D_3')(cbn2)
41 |
42 | # Batch normalize
43 | x = BatchNormalization(axis=-1, name='BN_2')(conv3)
44 |
45 | # BiRNNs
46 | # birnn1 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_1'), merge_mode='sum')(bn2)
47 | # birnn2 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_2'), merge_mode='sum')(birnn1)
48 | # birnn3 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_3'), merge_mode='sum')(birnn2)
49 | # birnn4 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_4'), merge_mode='sum')(birnn3)
50 | # birnn5 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_5'), merge_mode='sum')(birnn4)
51 | # birnn6 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_6'), merge_mode='sum')(birnn5)
52 | # birnn7 = Bidirectional(SimpleRNN(1280, return_sequences=True, name='BiRNN_7'), merge_mode='sum')(birnn6)
53 |
54 | # BiRNNs
55 | for i in [1, 2, 3, 4, 5]:
56 | recurrent = GRU(units=800,
57 | activation='tanh',
58 | recurrent_activation='sigmoid',
59 | use_bias=True,
60 | return_sequences=True,
61 | reset_after=True,
62 | name=f'gru_{i}')
63 | x = Bidirectional(recurrent,
64 | name=f'bidirectional_{i}',
65 | merge_mode='concat')(x)
66 | x = Dropout(rate=0.5)(x) if i < 5 else x # Only between
67 |
68 | # Batch normalize
69 | bn3 = BatchNormalization(axis=-1, name='BN_3')(x)
70 |
71 | dense = TimeDistributed(Dense(1024, activation='relu', name='FC1'))(bn3)
72 | y_pred = TimeDistributed(Dense(output_dim, activation='softmax', name='y_pred'), name='the_output')(dense)
73 |
74 | model = Model(inputs=input_data, outputs=y_pred)
75 |
76 | # # your ground truth data. The data you are going to compare with the model's outputs in training
77 | # labels = Input(name='the_labels', shape=[label_dim], dtype='float32')
78 | # # the length (in steps, or chars this case) of each sample (sentence) in the y_pred tensor
79 | # input_length = Input(name='input_length', shape=[1], dtype='float32')
80 | # # the length (in steps, or chars this case) of each sample (sentence) in the y_true
81 | # label_length = Input(name='label_length', shape=[1], dtype='float32')
82 | # output = Lambda(ctc_loss, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
83 | # model = Model(inputs=[input_data, labels, input_length, label_length], outputs=output, name="deepspeech2pro_v1")
84 | return model
85 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepAsr
2 | DeepAsr is an open-source & Keras (Tensorflow) implementation of end-to-end Automatic Speech Recognition (ASR) engine and it supports multiple Speech Recognition architectures.
3 |
4 | Supported Asr Architectures:
5 | - Baidu's Deep Speech 2
6 | - DeepAsrNetwork1
7 |
8 | **Using DeepAsr you can**:
9 | - perform speech-to-text using pre-trained models
10 | - tune pre-trained models to your needs
11 | - create new models on your own
12 |
13 | **DeepAsr key features**:
14 | - **Multi GPU support**: You can do much more like distribute the training using the [Strategy](https://www.tensorflow.org/guide/distributed_training), or experiment with [mixed precision](https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/Policy) policy.
15 | - **CuDNN support**: Model using [CuDNNLSTM](https://keras.io/layers/recurrent/) implementation by NVIDIA Developers. CPU devices is also supported.
16 | - **DataGenerator**: The feature extraction during model training for large the data.
17 |
18 | ## Installation
19 | You can use pip:
20 | ```bash
21 | pip install deepasr
22 | ```
23 |
24 | ## Getting started
25 | The speech recognition is a tough task. You don't need to know all details to use one of the pretrained models.
26 | However it's worth to understand conceptional crucial components:
27 | - **Input**: Audio files (WAV or FLAC) with mono 16-bit 16 kHz (up to 5 seconds)
28 | - **FeaturesExtractor**: Convert audio files using MFCC Features or Spectrogram
29 | - **Model**: CTC model defined in [**Keras**](https://keras.io/) (references: [[1]](https://arxiv.org/abs/1412.5567), [[2]](https://arxiv.org/abs/1512.02595))
30 | - **Decoder**: Greedy or BeamSearch algorithms with the language model support decode a sequence of probabilities using Alphabet
31 | - **DataGenerator**: Stream data to the model via generator
32 | - **Callbacks**: Set of functions monitoring the training
33 |
34 | ```python
35 | import numpy as np
36 | import pandas as pd
37 | import tensorflow as tf
38 | import deepasr as asr
39 |
40 | # get CTCPipeline
41 | def get_config(feature_type: str = 'spectrogram', multi_gpu: bool = False):
42 | # audio feature extractor
43 | features_extractor = asr.features.preprocess(feature_type=feature_type, features_num=161,
44 | samplerate=16000,
45 | winlen=0.02,
46 | winstep=0.025,
47 | winfunc=np.hanning)
48 |
49 | # input label encoder
50 | alphabet_en = asr.vocab.Alphabet(lang='en')
51 | # training model
52 | model = asr.model.get_deepspeech2(
53 | input_dim=161,
54 | output_dim=29,
55 | is_mixed_precision=True
56 | )
57 | # model optimizer
58 | optimizer = tf.keras.optimizers.Adam(
59 | lr=1e-4,
60 | beta_1=0.9,
61 | beta_2=0.999,
62 | epsilon=1e-8
63 | )
64 | # output label deocder
65 | decoder = asr.decoder.GreedyDecoder()
66 | # decoder = asr.decoder.BeamSearchDecoder(beam_width=100, top_paths=1)
67 | # CTCPipeline
68 | pipeline = asr.pipeline.ctc_pipeline.CTCPipeline(
69 | alphabet=alphabet_en, features_extractor=features_extractor, model=model, optimizer=optimizer, decoder=decoder,
70 | sample_rate=16000, mono=True, multi_gpu=multi_gpu
71 | )
72 | return pipeline
73 |
74 |
75 | train_data = pd.read_csv('train_data.csv')
76 |
77 | pipeline = get_config(feature_type = 'fbank', multi_gpu=False)
78 |
79 | # train asr model
80 | history = pipeline.fit(train_dataset=train_data, batch_size=128, epochs=500)
81 | # history = pipeline.fit_generator(train_dataset = train_data, batch_size=32, epochs=500)
82 |
83 | pipeline.save('./checkpoint')
84 | ```
85 |
86 | Loaded pre-trained model has all components. The prediction can be invoked just by calling pipline.predict().
87 |
88 | ```python
89 | import pandas as pd
90 | import deepasr as asr
91 | import numpy as np
92 | test_data = pd.read_csv('test_data.csv')
93 |
94 | # get testing audio and transcript from dataset
95 | index = np.random.randint(test_data.shape[0])
96 | data = test_data.iloc[index]
97 | test_file = data[0]
98 | test_transcript = data[1]
99 | # Test Audio file
100 | print("Audio File:",test_file)
101 | # Test Transcript
102 | print("Audio Transcript:", test_transcript)
103 | print("Transcript length:",len(test_transcript))
104 |
105 | pipeline = asr.pipeline.load('./checkpoint')
106 | print("Prediction", pipeline.predict(test_file))
107 | ```
108 |
109 | #### References
110 |
111 | The fundamental repositories:
112 | - Baidu - [DeepSpeech2 - A PaddlePaddle implementation of DeepSpeech2 architecture for ASR](https://github.com/PaddlePaddle/DeepSpeech)
113 | - NVIDIA - [Toolkit for efficient experimentation with Speech Recognition, Text2Speech and NLP](https://nvidia.github.io/OpenSeq2Seq)
114 | - TensorFlow - [The implementation of DeepSpeech2 model](https://github.com/tensorflow/models/tree/master/research/deep_speech)
115 | - Mozilla - [DeepSpeech - A TensorFlow implementation of Baidu's DeepSpeech architecture](https://github.com/mozilla/DeepSpeech)
116 | - Espnet - [End-to-End Speech Processing Toolkit](https://github.com/espnet/espnet)
117 | - Automatic Speech Recognition - [Distill the Automatic Speech Recognition research](https://github.com/rolczynski/Automatic-Speech-Recognition)
118 | - Python Speech Features - [Speech features for ASR including MFCCs and filterbank energies](https://github.com/jameslyons/python_speech_features)
--------------------------------------------------------------------------------
/deepasr/features/sigproc.py:
--------------------------------------------------------------------------------
1 | # This file includes routines for basic signal processing including framing and computing power spectra.
2 | # Author: James Lyons 2012
3 | import decimal
4 |
5 | import numpy
6 | import math
7 | import logging
8 |
9 |
10 | def round_half_up(number):
11 | return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP))
12 |
13 |
14 | def rolling_window(a, window, step=1):
15 | # http://ellisvalentiner.com/post/2017-03-21-np-strides-trick
16 | shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
17 | strides = a.strides + (a.strides[-1],)
18 | return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step]
19 |
20 |
21 | def framesig(sig, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,)), stride_trick=True):
22 | """Frame a signal into overlapping frames.
23 |
24 | :param sig: the audio signal to frame.
25 | :param frame_len: length of each frame measured in samples.
26 | :param frame_step: number of samples after the start of the previous frame that the next frame should begin.
27 | :param winfunc: the analysis window to apply to each frame. By default no window is applied.
28 | :param stride_trick: use stride trick to compute the rolling window and window multiplication faster
29 | :returns: an array of frames. Size is NUMFRAMES by frame_len.
30 | """
31 | slen = len(sig)
32 | frame_len = int(round_half_up(frame_len))
33 | frame_step = int(round_half_up(frame_step))
34 | if slen <= frame_len:
35 | numframes = 1
36 | else:
37 | numframes = 1 + int(math.ceil((1.0 * slen - frame_len) / frame_step))
38 |
39 | padlen = int((numframes - 1) * frame_step + frame_len)
40 |
41 | zeros = numpy.zeros((padlen - slen,))
42 | padsignal = numpy.concatenate((sig, zeros))
43 | if stride_trick:
44 | win = winfunc(frame_len)
45 | frames = rolling_window(padsignal, window=frame_len, step=frame_step)
46 | else:
47 | indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(
48 | numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T
49 | indices = numpy.array(indices, dtype=numpy.int32)
50 | frames = padsignal[indices]
51 | win = numpy.tile(winfunc(frame_len), (numframes, 1))
52 |
53 | return frames * win
54 |
55 |
56 | def deframesig(frames, siglen, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,))):
57 | """Does overlap-add procedure to undo the action of framesig.
58 |
59 | :param frames: the array of frames.
60 | :param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples.
61 | :param frame_len: length of each frame measured in samples.
62 | :param frame_step: number of samples after the start of the previous frame that the next frame should begin.
63 | :param winfunc: the analysis window to apply to each frame. By default no window is applied.
64 | :returns: a 1-D signal.
65 | """
66 | frame_len = round_half_up(frame_len)
67 | frame_step = round_half_up(frame_step)
68 | numframes = numpy.shape(frames)[0]
69 | assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len'
70 |
71 | indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(
72 | numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T
73 | indices = numpy.array(indices, dtype=numpy.int32)
74 | padlen = (numframes - 1) * frame_step + frame_len
75 |
76 | if siglen <= 0: siglen = padlen
77 |
78 | rec_signal = numpy.zeros((padlen,))
79 | window_correction = numpy.zeros((padlen,))
80 | win = winfunc(frame_len)
81 |
82 | for i in range(0, numframes):
83 | window_correction[indices[i, :]] = window_correction[
84 | indices[i, :]] + win + 1e-15 # add a little bit so it is never zero
85 | rec_signal[indices[i, :]] = rec_signal[indices[i, :]] + frames[i, :]
86 |
87 | rec_signal = rec_signal / window_correction
88 | return rec_signal[0:siglen]
89 |
90 |
91 | def magspec(frames, NFFT):
92 | """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
93 |
94 | :param frames: the array of frames. Each row is a frame.
95 | :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
96 | :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame.
97 | """
98 | if numpy.shape(frames)[1] > NFFT:
99 | logging.warn(
100 | 'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.',
101 | numpy.shape(frames)[1], NFFT)
102 | complex_spec = numpy.fft.rfft(frames, NFFT)
103 | return numpy.absolute(complex_spec)
104 |
105 |
106 | def powspec(frames, NFFT):
107 | """Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
108 |
109 | :param frames: the array of frames. Each row is a frame.
110 | :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
111 | :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame.
112 | """
113 | return 1.0 / NFFT * numpy.square(magspec(frames, NFFT))
114 |
115 |
116 | def logpowspec(frames, NFFT, norm=1):
117 | """Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
118 |
119 | :param frames: the array of frames. Each row is a frame.
120 | :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
121 | :param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0.
122 | :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame.
123 | """
124 | ps = powspec(frames, NFFT)
125 | ps[ps <= 1e-30] = 1e-30
126 | lps = 10 * numpy.log10(ps)
127 | if norm:
128 | return lps - numpy.max(lps)
129 | else:
130 | return lps
131 |
132 |
133 | def preemphasis(signal, coeff=0.95):
134 | """perform preemphasis on the input signal.
135 |
136 | :param signal: The signal to filter.
137 | :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95.
138 | :returns: the filtered signal.
139 | """
140 | return numpy.append(signal[0], signal[1:] - coeff * signal[:-1])
141 |
--------------------------------------------------------------------------------
/deepasr/features/mfcc.py:
--------------------------------------------------------------------------------
1 | # calculate filterbank features. Provides e.g. fbank and mfcc features for use in ASR applications
2 | # Author: James Lyons 2012
3 | from __future__ import division
4 | import numpy
5 | from . import sigproc
6 | from scipy.fftpack import dct
7 |
8 |
9 | def calculate_nfft(samplerate, winlen):
10 | """Calculates the FFT size as a power of two greater than or equal to
11 | the number of samples in a single window length.
12 |
13 | Having an FFT less than the window length loses precision by dropping
14 | many of the samples; a longer FFT than the window allows zero-padding
15 | of the FFT buffer which is neutral in terms of frequency domain conversion.
16 |
17 | :param samplerate: The sample rate of the signal we are working with, in Hz.
18 | :param winlen: The length of the analysis window in seconds.
19 | """
20 | window_length_samples = winlen * samplerate
21 | nfft = 1
22 | while nfft < window_length_samples:
23 | nfft *= 2
24 | return nfft
25 |
26 |
27 | def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13,
28 | nfilt=26, nfft=None, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True,
29 | winfunc=lambda x: numpy.ones((x,))):
30 | """Compute MFCC features from an audio signal.
31 |
32 | :param signal: the audio signal from which to compute features. Should be an N*1 array
33 | :param samplerate: the sample rate of the signal we are working with, in Hz.
34 | :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
35 | :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
36 | :param numcep: the number of cepstrum to return, default 13
37 | :param nfilt: the number of filters in the filterbank, default 26.
38 | :param nfft: the FFT size. Default is None, which uses the calculate_nfft function to choose the smallest size that does not drop sample data.
39 | :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
40 | :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
41 | :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
42 | :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
43 | :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
44 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
45 | :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
46 | """
47 | nfft = nfft or calculate_nfft(samplerate, winlen)
48 | feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc)
49 | feat = numpy.log(feat)
50 | feat = dct(feat, type=2, axis=1, norm='ortho')[:, :numcep]
51 | feat = lifter(feat, ceplifter)
52 | if appendEnergy: feat[:, 0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
53 | return feat
54 |
55 |
56 | def fbank(signal, samplerate=16000, winlen=0.025, winstep=0.01,
57 | nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97,
58 | winfunc=lambda x: numpy.ones((x,))):
59 | """Compute Mel-filterbank energy features from an audio signal.
60 |
61 | :param signal: the audio signal from which to compute features. Should be an N*1 array
62 | :param samplerate: the sample rate of the signal we are working with, in Hz.
63 | :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
64 | :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
65 | :param nfilt: the number of filters in the filterbank, default 26.
66 | :param nfft: the FFT size. Default is 512.
67 | :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
68 | :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
69 | :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
70 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
71 | :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
72 | second return value is the energy in each frame (total energy, unwindowed)
73 | """
74 | highfreq = highfreq or samplerate / 2
75 | signal = sigproc.preemphasis(signal, preemph)
76 | frames = sigproc.framesig(signal, winlen * samplerate, winstep * samplerate, winfunc)
77 | pspec = sigproc.powspec(frames, nfft)
78 | energy = numpy.sum(pspec, 1) # this stores the total energy in each frame
79 | energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy) # if energy is zero, we get problems with log
80 |
81 | fb = get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq)
82 | feat = numpy.dot(pspec, fb.T) # compute the filterbank energies
83 | feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat) # if feat is zero, we get problems with log
84 |
85 | return feat, energy
86 |
87 |
88 | def logfbank(signal, samplerate=16000, winlen=0.025, winstep=0.01,
89 | nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97,
90 | winfunc=lambda x: numpy.ones((x,))):
91 | """Compute log Mel-filterbank energy features from an audio signal.
92 |
93 | :param signal: the audio signal from which to compute features. Should be an N*1 array
94 | :param samplerate: the sample rate of the signal we are working with, in Hz.
95 | :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
96 | :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
97 | :param nfilt: the number of filters in the filterbank, default 26.
98 | :param nfft: the FFT size. Default is 512.
99 | :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
100 | :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
101 | :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
102 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
103 | :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
104 | """
105 | feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc)
106 | return numpy.log(feat)
107 |
108 |
109 | def ssc(signal, samplerate=16000, winlen=0.025, winstep=0.01,
110 | nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97,
111 | winfunc=lambda x: numpy.ones((x,))):
112 | """Compute Spectral Subband Centroid features from an audio signal.
113 |
114 | :param signal: the audio signal from which to compute features. Should be an N*1 array
115 | :param samplerate: the sample rate of the signal we are working with, in Hz.
116 | :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
117 | :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
118 | :param nfilt: the number of filters in the filterbank, default 26.
119 | :param nfft: the FFT size. Default is 512.
120 | :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
121 | :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
122 | :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
123 | :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
124 | :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
125 | """
126 | highfreq = highfreq or samplerate / 2
127 | signal = sigproc.preemphasis(signal, preemph)
128 | frames = sigproc.framesig(signal, winlen * samplerate, winstep * samplerate, winfunc)
129 | pspec = sigproc.powspec(frames, nfft)
130 | pspec = numpy.where(pspec == 0, numpy.finfo(float).eps, pspec) # if things are all zeros we get problems
131 |
132 | fb = get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq)
133 | feat = numpy.dot(pspec, fb.T) # compute the filterbank energies
134 | R = numpy.tile(numpy.linspace(1, samplerate / 2, numpy.size(pspec, 1)), (numpy.size(pspec, 0), 1))
135 |
136 | return numpy.dot(pspec * R, fb.T) / feat
137 |
138 |
139 | def hz2mel(hz):
140 | """Convert a value in Hertz to Mels
141 |
142 | :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
143 | :returns: a value in Mels. If an array was passed in, an identical sized array is returned.
144 | """
145 | return 2595 * numpy.log10(1 + hz / 700.)
146 |
147 |
148 | def mel2hz(mel):
149 | """Convert a value in Mels to Hertz
150 |
151 | :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
152 | :returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
153 | """
154 | return 700 * (10 ** (mel / 2595.0) - 1)
155 |
156 |
157 | def get_filterbanks(nfilt=20, nfft=512, samplerate=16000, lowfreq=0, highfreq=None):
158 | """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
159 | to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
160 |
161 | :param nfilt: the number of filters in the filterbank, default 20.
162 | :param nfft: the FFT size. Default is 512.
163 | :param samplerate: the sample rate of the signal we are working with, in Hz. Affects mel spacing.
164 | :param lowfreq: lowest band edge of mel filters, default 0 Hz
165 | :param highfreq: highest band edge of mel filters, default samplerate/2
166 | :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
167 | """
168 | highfreq = highfreq or samplerate / 2
169 | assert highfreq <= samplerate / 2, "highfreq is greater than samplerate/2"
170 |
171 | # compute points evenly spaced in mels
172 | lowmel = hz2mel(lowfreq)
173 | highmel = hz2mel(highfreq)
174 | melpoints = numpy.linspace(lowmel, highmel, nfilt + 2)
175 | # our points are in Hz, but we use fft bins, so we have to convert
176 | # from Hz to fft bin number
177 | bin = numpy.floor((nfft + 1) * mel2hz(melpoints) / samplerate)
178 |
179 | fbank = numpy.zeros([nfilt, nfft // 2 + 1])
180 | for j in range(0, nfilt):
181 | for i in range(int(bin[j]), int(bin[j + 1])):
182 | fbank[j, i] = (i - bin[j]) / (bin[j + 1] - bin[j])
183 | for i in range(int(bin[j + 1]), int(bin[j + 2])):
184 | fbank[j, i] = (bin[j + 2] - i) / (bin[j + 2] - bin[j + 1])
185 | return fbank
186 |
187 |
188 | def lifter(cepstra, L=22):
189 | """Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the
190 | magnitude of the high frequency DCT coeffs.
191 |
192 | :param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size.
193 | :param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter.
194 | """
195 | if L > 0:
196 | nframes, ncoeff = numpy.shape(cepstra)
197 | n = numpy.arange(ncoeff)
198 | lift = 1 + (L / 2.) * numpy.sin(numpy.pi * n / L)
199 | return lift * cepstra
200 | else:
201 | # values of L <= 0, do nothing
202 | return cepstra
203 |
204 |
205 | def delta(feat, N):
206 | """Compute delta features from a feature vector sequence.
207 |
208 | :param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector.
209 | :param N: For each frame, calculate delta features based on preceding and following N frames
210 | :returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector.
211 | """
212 | if N < 1:
213 | raise ValueError('N must be an integer >= 1')
214 | NUMFRAMES = len(feat)
215 | denominator = 2 * sum([i ** 2 for i in range(1, N + 1)])
216 | delta_feat = numpy.empty_like(feat)
217 | padded = numpy.pad(feat, ((N, N), (0, 0)), mode='edge') # padded version of feat
218 | for t in range(NUMFRAMES):
219 | delta_feat[t] = numpy.dot(numpy.arange(-N, N + 1),
220 | padded[t: t + 2 * N + 1]) / denominator # [t : t+2*N+1] == [(N+t)-N : (N+t)+N+1]
221 | return delta_feat
222 |
--------------------------------------------------------------------------------
/deepasr/pipeline/ctc_pipeline.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | from typing import List
4 | import numpy as np
5 | import random
6 | import tensorflow as tf
7 | from tensorflow import keras
8 | import pandas as pd
9 | from concurrent.futures import ThreadPoolExecutor, wait
10 | # from tensorflow.keras.layers import *
11 | from tensorflow.keras.models import Model
12 | import sys
13 |
14 | sys.path.append("..")
15 | from deepasr.pipeline import Pipeline
16 | from deepasr.augmentation import Augmentation
17 | from deepasr.decoder import Decoder
18 | from deepasr.features import FeaturesExtractor
19 | from deepasr.vocab import Alphabet
20 | from deepasr.utils import read_audio, save_data
21 | from deepasr.model import compile_model
22 |
23 | logger = logging.getLogger('asr.pipeline')
24 |
25 |
26 | class CTCPipeline(Pipeline):
27 | """
28 | The pipeline is responsible for connecting a neural network model with
29 | all non-differential transformations (features extraction or decoding),
30 | and dependencies. Components are independent.
31 | """
32 |
33 | def __init__(self,
34 | alphabet: Alphabet,
35 | features_extractor: FeaturesExtractor,
36 | model: keras.Model,
37 | optimizer: keras.optimizers.Optimizer,
38 | decoder: Decoder,
39 | sample_rate: int,
40 | mono: True,
41 | label_len: int = 0,
42 | multi_gpu: bool = True,
43 | temp_model: keras.Model = None):
44 | self._alphabet = alphabet
45 | self._optimizer = optimizer
46 | self._decoder = decoder
47 | self._features_extractor = features_extractor
48 | self.sample_rate = sample_rate
49 | self.mono = mono
50 | self.label_len = label_len
51 | self.multi_gpu = multi_gpu
52 | self._model = self._compile_model(model, optimizer, multi_gpu)
53 | self.temp_model = temp_model if temp_model else self._model
54 |
55 | @property
56 | def alphabet(self) -> Alphabet:
57 | return self._alphabet
58 |
59 | @property
60 | def features_extractor(self) -> FeaturesExtractor:
61 | return self._features_extractor
62 |
63 | @property
64 | def model(self) -> keras.Model:
65 | return self.temp_model
66 |
67 | @property
68 | def decoder(self) -> Decoder:
69 | return self._decoder
70 |
71 | def preprocess(self,
72 | data: List[np.ndarray],
73 | is_extracted: bool,
74 | augmentation: Augmentation) -> np.ndarray:
75 | """ Preprocess batch data to format understandable to a model. """
76 |
77 | if is_extracted: # then just align features
78 | features = FeaturesExtractor.align(data)
79 | else:
80 | features = self._features_extractor(data)
81 | features = augmentation(features) if augmentation else features
82 | # labels = self._alphabet.get_batch_labels(transcripts)
83 | return features
84 |
85 | def fit_iter(self,
86 | train_dataset: pd.DataFrame,
87 | augmentation: Augmentation = None,
88 | prepared_features: bool = False,
89 | iter_num: int = 1000,
90 | batch_size: int = 32,
91 | epochs: int = 3,
92 | checkpoint: str = None,
93 | **kwargs) -> keras.callbacks.History:
94 | """ Get ready data and train a model. """
95 |
96 | history = keras.callbacks.History()
97 |
98 | audios = train_dataset['path'].to_list()
99 |
100 | labels = self._alphabet.get_batch_labels(train_dataset['transcripts'].to_list())
101 |
102 | transcripts = train_dataset['transcripts'].to_list()
103 |
104 | train_len_ = len(transcripts)
105 |
106 | self.label_len = labels.shape[1]
107 |
108 | self._model.summary()
109 |
110 | for i in range(iter_num):
111 | train_index = random.sample(range(train_len_ - 25), batch_size)
112 |
113 | x_train = [audios[i] for i in train_index]
114 |
115 | y_train = [labels[i] for i in train_index]
116 |
117 | y_trans = [transcripts[i] for i in train_index]
118 |
119 | train_inputs = self.wrap_preprocess(x_train,
120 | y_train,
121 | y_trans, augmentation, prepared_features)
122 |
123 | outputs = {'ctc': np.zeros([batch_size])}
124 |
125 | # print(train_inputs['the_input'].shape)
126 | # print(train_inputs['the_labels'].shape)
127 | # print(train_inputs['input_length'].shape)
128 | # print(train_inputs['label_length'].shape)
129 | # print(train_inputs['input_length'])
130 | # print(train_inputs['label_length'])
131 |
132 | if i % 100 == 0:
133 | print("iter:", i)
134 | print("input features: ", train_inputs['the_input'].shape)
135 | print("input labels: ", train_inputs['the_labels'].shape)
136 | history = self._model.fit(train_inputs, outputs,
137 | batch_size=batch_size,
138 | epochs=epochs,
139 | verbose=1, **kwargs)
140 | if checkpoint:
141 | self.save(checkpoint)
142 | print("Pipeline Saved at", checkpoint)
143 | else:
144 | history = self._model.fit(train_inputs, outputs,
145 | batch_size=batch_size,
146 | epochs=epochs,
147 | verbose=0, **kwargs)
148 |
149 | return history
150 |
151 | def fit(self,
152 | train_dataset: pd.DataFrame,
153 | augmentation: Augmentation = None,
154 | prepared_features: bool = False,
155 | batch_size: int = 32,
156 | epochs: int = 3,
157 | checkpoint: str = None,
158 | **kwargs) -> keras.callbacks.History:
159 | """ Get ready data and train a model. """
160 |
161 | audios = train_dataset['path'].to_list()
162 |
163 | labels = self._alphabet.get_batch_labels(train_dataset['transcripts'].to_list())
164 |
165 | transcripts = train_dataset['transcripts'].to_list()
166 |
167 | self.label_len = labels.shape[1]
168 |
169 | self._model.summary()
170 |
171 | print("Feature Extraction in progress...")
172 | train_inputs = self.wrap_preprocess(audios,
173 | list(labels),
174 | transcripts, augmentation, prepared_features)
175 |
176 | outputs = {'ctc': np.zeros([len(audios)])}
177 |
178 | print("Feature Extraction completed.")
179 |
180 | print("input features: ", train_inputs['the_input'].shape)
181 | print("input labels: ", train_inputs['the_labels'].shape)
182 |
183 | print("Model training initiated...")
184 |
185 | history = self._model.fit(train_inputs, outputs,
186 | batch_size=batch_size,
187 | epochs=epochs,
188 | verbose=1, **kwargs)
189 |
190 | return history
191 |
192 | def fit_generator(self, train_dataset: pd.DataFrame,
193 | shuffle: bool = True,
194 | augmentation: Augmentation = None,
195 | prepared_features: bool = False,
196 | batch_size: int = 32,
197 | epochs: int = 3,
198 | verbose: int = 1,
199 | **kwargs) -> keras.callbacks.History:
200 |
201 | """ Get ready data and train a model. """
202 |
203 | audios = train_dataset['path'].to_list()
204 |
205 | labels = self._alphabet.get_batch_labels(train_dataset['transcripts'].to_list())
206 |
207 | transcripts = train_dataset['transcripts'].to_list()
208 |
209 | train_len_ = len(transcripts)
210 |
211 | self.label_len = labels.shape[1]
212 |
213 | self._model.summary()
214 |
215 | train_gen = self.get_generator(audios, labels, transcripts,
216 | batch_size, shuffle, augmentation, prepared_features)
217 |
218 | return self._model.fit(train_gen, epochs=epochs,
219 | steps_per_epoch=train_len_ // batch_size, verbose=verbose, **kwargs)
220 |
221 | def get_generator(self, audio_paths: List[str], texts: np.array, transcripts: List[str], batch_size: int = 32,
222 | shuffle: bool = True, augmentation: Augmentation = None,
223 | prepared_features: bool = False):
224 | """ Data Generator """
225 |
226 | def generator():
227 | num_samples = len(audio_paths)
228 | while True:
229 | x = list()
230 | y = list()
231 | if shuffle:
232 | temp = list(zip(audio_paths, texts))
233 | random.Random(123).shuffle(temp)
234 | x, y = list(zip(*temp))
235 |
236 | pool = ThreadPoolExecutor(1) # Run a single I/O thread in parallel
237 | future = pool.submit(self.wrap_preprocess,
238 | x[:batch_size],
239 | y[:batch_size], transcripts[:batch_size], augmentation, prepared_features)
240 | for offset in range(batch_size, num_samples, batch_size):
241 | wait([future])
242 | batch = future.result()
243 | future = pool.submit(self.wrap_preprocess,
244 | x[offset: offset + batch_size],
245 | y[offset: offset + batch_size], transcripts[offset:offset + batch_size],
246 | augmentation, prepared_features)
247 | yield batch, {'ctc': np.zeros([batch_size])}
248 |
249 | return generator()
250 |
251 | def wrap_preprocess(self, audios: List[str], the_labels: List[np.array], transcripts: List[str],
252 | augmentation: Augmentation = None,
253 | prepared_features: bool = False):
254 | """ Build training data """
255 | # the_input = np.array(the_input) / 100
256 | # the_input = x3/np.max(the_input)
257 |
258 | mid_features = [read_audio(audio, sample_rate=self.sample_rate, mono=self.mono) for audio in audios]
259 |
260 | the_input = self.preprocess(mid_features, prepared_features, augmentation)
261 |
262 | the_labels = np.array(the_labels)
263 |
264 | label_len = [len(trans) for trans in transcripts] # length of each transcription
265 | label_lengths = np.array(label_len).reshape(-1, 1) # reshape to 1d
266 |
267 | input_lengths = np.ones((the_labels.shape[0], 1)) * the_labels.shape[1]
268 | for i in range(the_input.shape[0]):
269 | input_lengths[i] = the_labels.shape[1] # num of features from labels
270 |
271 | return {
272 | 'the_input': the_input,
273 | 'the_labels': the_labels,
274 | 'input_length': np.asarray(input_lengths),
275 | 'label_length': np.asarray(label_lengths)
276 | }
277 |
278 | def predict(self, audio: str, **kwargs) -> List[str]:
279 | """ Get ready features, and make a prediction. """
280 | # get audio features
281 | features = self.features_extractor.make_features(
282 | read_audio(audio, sample_rate=self.sample_rate, mono=self.mono))
283 | in_features = self.features_extractor.align([features], self.features_extractor.features_shape)
284 |
285 | pred_model = Model(inputs=self._model.get_layer('the_input').output,
286 | outputs=self._model.get_layer('the_output').output)
287 | batch_logits = pred_model.predict(in_features, **kwargs)
288 | decoded_labels = self._decoder(batch_logits, self.label_len)
289 | predictions = self._alphabet.get_batch_transcripts(decoded_labels)
290 | return predictions
291 |
292 | def save(self, directory: str):
293 | """ Save each component of the CTC pipeline. """
294 | self.temp_model.save(os.path.join(directory, 'network.h5'))
295 | self._model.save_weights(os.path.join(directory, 'model_weights.h5'))
296 | save_data(self._optimizer, os.path.join(directory, 'optimizer.bin'))
297 | save_data(self._alphabet, os.path.join(directory, 'alphabet.bin'))
298 | save_data(self._decoder, os.path.join(directory, 'decoder.bin'))
299 | save_data(self.multi_gpu, os.path.join(directory, 'multi_gpu_flag.bin'))
300 | save_data(self.sample_rate, os.path.join(directory, 'sample_rate.bin'))
301 | save_data(self.mono, os.path.join(directory, 'mono.bin'))
302 | save_data(self.label_len, os.path.join(directory, 'label_len.bin'))
303 | save_data(self._features_extractor,
304 | os.path.join(directory, 'feature_extractor.bin'))
305 |
306 | # def load(self, directory: str):
307 | # """ Load each component of the CTC pipeline. """
308 | # # model = keras.models.load_model(os.path.join(directory, 'model.h5'),
309 | # # custom_objects={'clipped_relu': cls.clipped_relu})
310 | # self._model.load_weights(os.path.join(directory, 'model_weights.h5'))
311 | # self._alphabet = load_data(os.path.join(directory, 'alphabet.bin'))
312 | # self._decoder = load_data(os.path.join(directory, 'decoder.bin'))
313 | # self._features_extractor = load_data(
314 | # os.path.join(directory, 'feature_extractor.bin'))
315 |
316 | @staticmethod
317 | def _compile_model(model: keras.Model,
318 | optimizer: keras.optimizers.Optimizer,
319 | multi_gpu: bool) -> keras.Model:
320 | """ Replicates a model on different GPUs. """
321 | if not multi_gpu:
322 | dist_model = compile_model(model, optimizer)
323 | logger.info("Training using single GPU or CPU")
324 | else:
325 | try:
326 | strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
327 | with strategy.scope():
328 | dist_model = compile_model(model, optimizer)
329 | logger.info("Training using multiple GPUs")
330 | except ValueError:
331 | dist_model = compile_model(model, optimizer)
332 | logger.info("Training using single GPU or CPU")
333 | return dist_model
334 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 | DeepAsr is an open-source implementation of
633 | end-to-end Automatic Speech Recognition (ASR) engine.
634 | Copyright (C) 2020 Sai Kumar Yava
635 |
636 | This program is free software: you can redistribute it and/or modify
637 | it under the terms of the GNU Affero General Public License as published
638 | by the Free Software Foundation, either version 3 of the License, or
639 | (at your option) any later version.
640 |
641 | This program is distributed in the hope that it will be useful,
642 | but WITHOUT ANY WARRANTY; without even the implied warranty of
643 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
644 | GNU Affero General Public License for more details.
645 |
646 | You should have received a copy of the GNU Affero General Public License
647 | along with this program. If not, see .
648 |
649 | Also add information on how to contact you by electronic and paper mail.
650 |
651 | If your software can interact with users remotely through a computer
652 | network, you should also make sure that it provides a way for users to
653 | get its source. For example, if your program is a web application, its
654 | interface could display a "Source" link that leads users to an archive
655 | of the code. There are many ways you could offer source, and different
656 | solutions will be better for different programs; see section 13 for the
657 | specific requirements.
658 |
659 | You should also get your employer (if you work as a programmer) or school,
660 | if any, to sign a "copyright disclaimer" for the program, if necessary.
661 | For more information on this, and how to apply and follow the GNU AGPL, see
662 | .
663 |
--------------------------------------------------------------------------------
/DeepAsr_CTC_Pipeline.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# DeepAsr (DeepAsrNetwork1)"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {
14 | "colab": {},
15 | "colab_type": "code",
16 | "id": "S0FiiN9Y0FEs"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "# !wget http://www.openslr.org/resources/12/train-clean-100.tar.gz"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {
27 | "colab": {},
28 | "colab_type": "code",
29 | "id": "5JJMHx460FE4"
30 | },
31 | "outputs": [],
32 | "source": [
33 | "# !tar xzvf train-clean-100.tar.gz"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {
40 | "colab": {},
41 | "colab_type": "code",
42 | "id": "SHLb6nDsUwkN"
43 | },
44 | "outputs": [],
45 | "source": [
46 | "# ! pip install tensorflow==2.1.0"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {
52 | "colab_type": "text",
53 | "id": "Vx9UdVs5384B"
54 | },
55 | "source": [
56 | "# 1. Prepare DataSet"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "metadata": {
63 | "colab": {},
64 | "colab_type": "code",
65 | "id": "kw_18d180FFM"
66 | },
67 | "outputs": [],
68 | "source": [
69 | "import os\n",
70 | "import numpy as np\n",
71 | "import pandas as pd\n",
72 | "import tensorflow as tf\n",
73 | "import deepasr as asr\n",
74 | "import librosa"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 5,
80 | "metadata": {
81 | "colab": {
82 | "base_uri": "https://localhost:8080/",
83 | "height": 34
84 | },
85 | "colab_type": "code",
86 | "id": "d29QeHTJVNOF",
87 | "outputId": "b79a2fa5-e783-4543-d5b1-125fb3a1bd92"
88 | },
89 | "outputs": [
90 | {
91 | "data": {
92 | "text/plain": [
93 | "'2.1.0'"
94 | ]
95 | },
96 | "execution_count": 5,
97 | "metadata": {
98 | "tags": []
99 | },
100 | "output_type": "execute_result"
101 | }
102 | ],
103 | "source": [
104 | "tf.__version__"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 6,
110 | "metadata": {
111 | "colab": {
112 | "base_uri": "https://localhost:8080/",
113 | "height": 34
114 | },
115 | "colab_type": "code",
116 | "id": "zQEGr0HfC5OF",
117 | "outputId": "88d851cc-ec17-42d8-c8a2-26cfda9506cb"
118 | },
119 | "outputs": [
120 | {
121 | "data": {
122 | "text/plain": [
123 | "'0.0.9'"
124 | ]
125 | },
126 | "execution_count": 6,
127 | "metadata": {
128 | "tags": []
129 | },
130 | "output_type": "execute_result"
131 | }
132 | ],
133 | "source": [
134 | "asr.__version__"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {
141 | "colab": {},
142 | "colab_type": "code",
143 | "id": "NmOf6DzG0FFS"
144 | },
145 | "outputs": [],
146 | "source": [
147 | "# get audios and transcripts\n",
148 | "org_path = './LibriSpeech/train-clean-100/'\n",
149 | "count = 0\n",
150 | "inp = []\n",
151 | "k=0\n",
152 | "audio_name = []\n",
153 | "audio_trans = []\n",
154 | "for dir1 in os.listdir(org_path):\n",
155 | " dir2_path = org_path+dir1+'/'\n",
156 | " #print(dir2_path)\n",
157 | " for dir2 in os.listdir(dir2_path):\n",
158 | " dir3_path = dir2_path+dir2+'/'\n",
159 | " \n",
160 | " for audio in os.listdir(dir3_path):\n",
161 | " if audio.endswith('.txt'):\n",
162 | " k+=1\n",
163 | " file_path = dir3_path + audio\n",
164 | " with open(file_path) as f:\n",
165 | " line = f.readlines()\n",
166 | " for lines in line:\n",
167 | " flac_path = dir3_path+lines.split()[0]+'.flac'\n",
168 | " \n",
169 | " audio_name.append(flac_path)\n",
170 | "\n",
171 | " # print(cmd)\n",
172 | " words2 = lines.split()[1:]\n",
173 | " words4=' '.join(words2)\n",
174 | " audio_trans.append(words4)"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {
181 | "colab": {},
182 | "colab_type": "code",
183 | "id": "5E9POoGc0FFb"
184 | },
185 | "outputs": [],
186 | "source": [
187 | "# create dataset\n",
188 | "df = pd.DataFrame({\"path\":audio_name,\"transcripts\":audio_trans})"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 9,
194 | "metadata": {
195 | "colab": {
196 | "base_uri": "https://localhost:8080/",
197 | "height": 34
198 | },
199 | "colab_type": "code",
200 | "id": "TRuPCDxZrhJu",
201 | "outputId": "64b3dc3f-ec85-4fc4-cf4e-60204b6f719e"
202 | },
203 | "outputs": [
204 | {
205 | "data": {
206 | "text/plain": [
207 | "(28539, 2)"
208 | ]
209 | },
210 | "execution_count": 9,
211 | "metadata": {
212 | "tags": []
213 | },
214 | "output_type": "execute_result"
215 | }
216 | ],
217 | "source": [
218 | "df.shape"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": null,
224 | "metadata": {
225 | "colab": {},
226 | "colab_type": "code",
227 | "id": "g4bePqQvri5Q"
228 | },
229 | "outputs": [],
230 | "source": [
231 | "# filter transcript less than 100 charcters\n",
232 | "train_data = df[df['transcripts'].str.len() < 100]\n",
233 | "# train_df = df.sample(n = 3000) "
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 11,
239 | "metadata": {
240 | "colab": {
241 | "base_uri": "https://localhost:8080/",
242 | "height": 34
243 | },
244 | "colab_type": "code",
245 | "id": "fiM94FU3rkh7",
246 | "outputId": "9c60db2a-8b85-47e9-a294-5e46d7c2c41e"
247 | },
248 | "outputs": [
249 | {
250 | "data": {
251 | "text/plain": [
252 | "(3194, 2)"
253 | ]
254 | },
255 | "execution_count": 11,
256 | "metadata": {
257 | "tags": []
258 | },
259 | "output_type": "execute_result"
260 | }
261 | ],
262 | "source": [
263 | "train_data.shape"
264 | ]
265 | },
266 | {
267 | "cell_type": "markdown",
268 | "metadata": {
269 | "colab_type": "text",
270 | "id": "EMDC5MYk4AyL"
271 | },
272 | "source": [
273 | "# 2. Prepare DeepAsr CTC Pipeline"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": null,
279 | "metadata": {
280 | "colab": {},
281 | "colab_type": "code",
282 | "id": "-youl7Mo0FFi"
283 | },
284 | "outputs": [],
285 | "source": [
286 | "# get CTCPipeline\n",
287 | "def get_config(feature_type: str = 'spectrogram', multi_gpu: bool = False):\n",
288 | " # audio feature extractor\n",
289 | " features_extractor = asr.features.preprocess(feature_type=feature_type, features_num=161,\n",
290 | " samplerate=16000,\n",
291 | " winlen=0.02,\n",
292 | " winstep=0.025,\n",
293 | " winfunc=np.hanning)\n",
294 | " \n",
295 | " # input label encoder\n",
296 | " alphabet_en = asr.vocab.Alphabet(lang='en')\n",
297 | " # training model\n",
298 | " model = asr.model.get_deepasrnetwork1(\n",
299 | " input_dim=161,\n",
300 | " output_dim=29,\n",
301 | " is_mixed_precision=True\n",
302 | " )\n",
303 | " # model optimizer\n",
304 | " optimizer = tf.keras.optimizers.Adam(\n",
305 | " lr=1e-4,\n",
306 | " beta_1=0.9,\n",
307 | " beta_2=0.999,\n",
308 | " epsilon=1e-8\n",
309 | " )\n",
310 | " # output label deocder\n",
311 | " decoder = asr.decoder.GreedyDecoder()\n",
312 | " # CTCPipeline\n",
313 | " pipeline = asr.pipeline.ctc_pipeline.CTCPipeline(\n",
314 | " alphabet=alphabet_en, features_extractor=features_extractor, model=model, optimizer=optimizer, decoder=decoder,\n",
315 | " sample_rate=16000, mono=True, multi_gpu=multi_gpu\n",
316 | " )\n",
317 | " return pipeline"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": null,
323 | "metadata": {
324 | "colab": {},
325 | "colab_type": "code",
326 | "id": "MqdfySzuRtk5"
327 | },
328 | "outputs": [],
329 | "source": [
330 | "# CTCPiline for asr\n",
331 | "pipeline = get_config(feature_type = 'fbank', multi_gpu=False)"
332 | ]
333 | },
334 | {
335 | "cell_type": "markdown",
336 | "metadata": {
337 | "colab_type": "text",
338 | "id": "WTG8iEwS4NKU"
339 | },
340 | "source": [
341 | "# 3. Model traning"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 21,
347 | "metadata": {
348 | "colab": {
349 | "base_uri": "https://localhost:8080/",
350 | "height": 1000
351 | },
352 | "colab_type": "code",
353 | "id": "6QMxCI8T0qMK",
354 | "outputId": "a598acfb-1cce-41db-db3b-b2d5cdc062d9"
355 | },
356 | "outputs": [
357 | {
358 | "name": "stdout",
359 | "output_type": "stream",
360 | "text": [
361 | "Model: \"DeepAsr\"\n",
362 | "__________________________________________________________________________________________________\n",
363 | "Layer (type) Output Shape Param # Connected to \n",
364 | "==================================================================================================\n",
365 | "the_input (InputLayer) [(None, None, 161)] 0 \n",
366 | "__________________________________________________________________________________________________\n",
367 | "BN_1 (BatchNormalization) (None, None, 161) 644 the_input[0][0] \n",
368 | "__________________________________________________________________________________________________\n",
369 | "Conv1D_1 (Conv1D) (None, None, 220) 177320 BN_1[0][0] \n",
370 | "__________________________________________________________________________________________________\n",
371 | "CNBN_1 (BatchNormalization) (None, None, 220) 880 Conv1D_1[0][0] \n",
372 | "__________________________________________________________________________________________________\n",
373 | "Conv1D_2 (Conv1D) (None, None, 220) 242220 CNBN_1[0][0] \n",
374 | "__________________________________________________________________________________________________\n",
375 | "CNBN_2 (BatchNormalization) (None, None, 220) 880 Conv1D_2[0][0] \n",
376 | "__________________________________________________________________________________________________\n",
377 | "gru_1 (GRU) (None, None, 512) 1127424 CNBN_2[0][0] \n",
378 | "__________________________________________________________________________________________________\n",
379 | "gru_2 (GRU) (None, None, 512) 1127424 CNBN_2[0][0] \n",
380 | "__________________________________________________________________________________________________\n",
381 | "concatenate (Concatenate) (None, None, 1024) 0 gru_1[0][0] \n",
382 | " gru_2[0][0] \n",
383 | "__________________________________________________________________________________________________\n",
384 | "BN_2 (BatchNormalization) (None, None, 1024) 4096 concatenate[0][0] \n",
385 | "__________________________________________________________________________________________________\n",
386 | "time_distributed (TimeDistribut (None, None, 30) 30750 BN_2[0][0] \n",
387 | "__________________________________________________________________________________________________\n",
388 | "the_output (TimeDistributed) (None, None, 29) 899 time_distributed[0][0] \n",
389 | "__________________________________________________________________________________________________\n",
390 | "the_labels (InputLayer) [(None, None)] 0 \n",
391 | "__________________________________________________________________________________________________\n",
392 | "input_length (InputLayer) [(None, 1)] 0 \n",
393 | "__________________________________________________________________________________________________\n",
394 | "label_length (InputLayer) [(None, 1)] 0 \n",
395 | "__________________________________________________________________________________________________\n",
396 | "ctc (Lambda) (None, 1) 0 the_output[0][0] \n",
397 | " the_labels[0][0] \n",
398 | " input_length[0][0] \n",
399 | " label_length[0][0] \n",
400 | "==================================================================================================\n",
401 | "Total params: 2,712,537\n",
402 | "Trainable params: 2,709,287\n",
403 | "Non-trainable params: 3,250\n",
404 | "__________________________________________________________________________________________________\n",
405 | "Feature Extraction in progress...\n",
406 | "Feature Extraction completed.\n",
407 | "input features: (3194, 593, 161)\n",
408 | "input labels: (3194, 99)\n",
409 | "Model training initiated...\n",
410 | "Train on 3194 samples\n",
411 | "Epoch 1/500\n",
412 | "3194/3194 [==============================] - 48s 15ms/sample - loss: inf - accuracy: 0.0000e+00\n",
413 | "Epoch 2/500\n",
414 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
415 | "Epoch 3/500\n",
416 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
417 | "Epoch 4/500\n",
418 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
419 | "Epoch 5/500\n",
420 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
421 | "Epoch 6/500\n",
422 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
423 | "Epoch 7/500\n",
424 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
425 | "Epoch 8/500\n",
426 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
427 | "Epoch 9/500\n",
428 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
429 | "Epoch 10/500\n",
430 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
431 | "Epoch 11/500\n",
432 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
433 | "Epoch 12/500\n",
434 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
435 | "Epoch 13/500\n",
436 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
437 | "Epoch 14/500\n",
438 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
439 | "Epoch 15/500\n",
440 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
441 | "Epoch 16/500\n",
442 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
443 | "Epoch 17/500\n",
444 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
445 | "Epoch 18/500\n",
446 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
447 | "Epoch 19/500\n",
448 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
449 | "Epoch 20/500\n",
450 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
451 | "Epoch 21/500\n",
452 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
453 | "Epoch 22/500\n",
454 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
455 | "Epoch 23/500\n",
456 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
457 | "Epoch 24/500\n",
458 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
459 | "Epoch 25/500\n",
460 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
461 | "Epoch 26/500\n",
462 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
463 | "Epoch 27/500\n",
464 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
465 | "Epoch 28/500\n",
466 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
467 | "Epoch 29/500\n",
468 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
469 | "Epoch 30/500\n",
470 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
471 | "Epoch 31/500\n",
472 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
473 | "Epoch 32/500\n",
474 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
475 | "Epoch 33/500\n",
476 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
477 | "Epoch 34/500\n",
478 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
479 | "Epoch 35/500\n",
480 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
481 | "Epoch 36/500\n",
482 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
483 | "Epoch 37/500\n",
484 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
485 | "Epoch 38/500\n",
486 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
487 | "Epoch 39/500\n",
488 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
489 | "Epoch 40/500\n",
490 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
491 | "Epoch 41/500\n",
492 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
493 | "Epoch 42/500\n",
494 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
495 | "Epoch 43/500\n",
496 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
497 | "Epoch 44/500\n",
498 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
499 | "Epoch 45/500\n",
500 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
501 | "Epoch 46/500\n",
502 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
503 | "Epoch 47/500\n",
504 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
505 | "Epoch 48/500\n",
506 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
507 | "Epoch 49/500\n",
508 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
509 | "Epoch 50/500\n",
510 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
511 | "Epoch 51/500\n",
512 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
513 | "Epoch 52/500\n",
514 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
515 | "Epoch 53/500\n",
516 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
517 | "Epoch 54/500\n",
518 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
519 | "Epoch 55/500\n",
520 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
521 | "Epoch 56/500\n",
522 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
523 | "Epoch 57/500\n",
524 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
525 | "Epoch 58/500\n",
526 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
527 | "Epoch 59/500\n",
528 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
529 | "Epoch 60/500\n",
530 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
531 | "Epoch 61/500\n",
532 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
533 | "Epoch 62/500\n",
534 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
535 | "Epoch 63/500\n",
536 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
537 | "Epoch 64/500\n",
538 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
539 | "Epoch 65/500\n",
540 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
541 | "Epoch 66/500\n",
542 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
543 | "Epoch 67/500\n",
544 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
545 | "Epoch 68/500\n",
546 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
547 | "Epoch 69/500\n",
548 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
549 | "Epoch 70/500\n",
550 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
551 | "Epoch 71/500\n",
552 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
553 | "Epoch 72/500\n",
554 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
555 | "Epoch 73/500\n",
556 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
557 | "Epoch 74/500\n",
558 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
559 | "Epoch 75/500\n",
560 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
561 | "Epoch 76/500\n",
562 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
563 | "Epoch 77/500\n",
564 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
565 | "Epoch 78/500\n",
566 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
567 | "Epoch 79/500\n",
568 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
569 | "Epoch 80/500\n",
570 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
571 | "Epoch 81/500\n",
572 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
573 | "Epoch 82/500\n",
574 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
575 | "Epoch 83/500\n",
576 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
577 | "Epoch 84/500\n",
578 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
579 | "Epoch 85/500\n",
580 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
581 | "Epoch 86/500\n",
582 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
583 | "Epoch 87/500\n",
584 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
585 | "Epoch 88/500\n",
586 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
587 | "Epoch 89/500\n",
588 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
589 | "Epoch 90/500\n",
590 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
591 | "Epoch 91/500\n",
592 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
593 | "Epoch 92/500\n",
594 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
595 | "Epoch 93/500\n",
596 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
597 | "Epoch 94/500\n",
598 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
599 | "Epoch 95/500\n",
600 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
601 | "Epoch 96/500\n",
602 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
603 | "Epoch 97/500\n",
604 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
605 | "Epoch 98/500\n",
606 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
607 | "Epoch 99/500\n",
608 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
609 | "Epoch 100/500\n",
610 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
611 | "Epoch 101/500\n",
612 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
613 | "Epoch 102/500\n",
614 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
615 | "Epoch 103/500\n",
616 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
617 | "Epoch 104/500\n",
618 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
619 | "Epoch 105/500\n",
620 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
621 | "Epoch 106/500\n",
622 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
623 | "Epoch 107/500\n",
624 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
625 | "Epoch 108/500\n",
626 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
627 | "Epoch 109/500\n",
628 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
629 | "Epoch 110/500\n",
630 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
631 | "Epoch 111/500\n",
632 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
633 | "Epoch 112/500\n",
634 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
635 | "Epoch 113/500\n",
636 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
637 | "Epoch 114/500\n",
638 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
639 | "Epoch 115/500\n",
640 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
641 | "Epoch 116/500\n",
642 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
643 | "Epoch 117/500\n",
644 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
645 | "Epoch 118/500\n",
646 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
647 | "Epoch 119/500\n",
648 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
649 | "Epoch 120/500\n",
650 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
651 | "Epoch 121/500\n",
652 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
653 | "Epoch 122/500\n",
654 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
655 | "Epoch 123/500\n",
656 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
657 | "Epoch 124/500\n",
658 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
659 | "Epoch 125/500\n",
660 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
661 | "Epoch 126/500\n",
662 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
663 | "Epoch 127/500\n",
664 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
665 | "Epoch 128/500\n",
666 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
667 | "Epoch 129/500\n",
668 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
669 | "Epoch 130/500\n",
670 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
671 | "Epoch 131/500\n",
672 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
673 | "Epoch 132/500\n",
674 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
675 | "Epoch 133/500\n",
676 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
677 | "Epoch 134/500\n",
678 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
679 | "Epoch 135/500\n",
680 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
681 | "Epoch 136/500\n",
682 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
683 | "Epoch 137/500\n",
684 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
685 | "Epoch 138/500\n",
686 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
687 | "Epoch 139/500\n",
688 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
689 | "Epoch 140/500\n",
690 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
691 | "Epoch 141/500\n",
692 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
693 | "Epoch 142/500\n",
694 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
695 | "Epoch 143/500\n",
696 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
697 | "Epoch 144/500\n",
698 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
699 | "Epoch 145/500\n",
700 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
701 | "Epoch 146/500\n",
702 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
703 | "Epoch 147/500\n",
704 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
705 | "Epoch 148/500\n",
706 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
707 | "Epoch 149/500\n",
708 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
709 | "Epoch 150/500\n",
710 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
711 | "Epoch 151/500\n",
712 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
713 | "Epoch 152/500\n",
714 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
715 | "Epoch 153/500\n",
716 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
717 | "Epoch 154/500\n",
718 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
719 | "Epoch 155/500\n",
720 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
721 | "Epoch 156/500\n",
722 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
723 | "Epoch 157/500\n",
724 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
725 | "Epoch 158/500\n",
726 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
727 | "Epoch 159/500\n",
728 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
729 | "Epoch 160/500\n",
730 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
731 | "Epoch 161/500\n",
732 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
733 | "Epoch 162/500\n",
734 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
735 | "Epoch 163/500\n",
736 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
737 | "Epoch 164/500\n",
738 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
739 | "Epoch 165/500\n",
740 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
741 | "Epoch 166/500\n",
742 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
743 | "Epoch 167/500\n",
744 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
745 | "Epoch 168/500\n",
746 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
747 | "Epoch 169/500\n",
748 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
749 | "Epoch 170/500\n",
750 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
751 | "Epoch 171/500\n",
752 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
753 | "Epoch 172/500\n",
754 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
755 | "Epoch 173/500\n",
756 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
757 | "Epoch 174/500\n",
758 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
759 | "Epoch 175/500\n",
760 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
761 | "Epoch 176/500\n",
762 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
763 | "Epoch 177/500\n",
764 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
765 | "Epoch 178/500\n",
766 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
767 | "Epoch 179/500\n",
768 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
769 | "Epoch 180/500\n",
770 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
771 | "Epoch 181/500\n",
772 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
773 | "Epoch 182/500\n",
774 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
775 | "Epoch 183/500\n",
776 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
777 | "Epoch 184/500\n",
778 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
779 | "Epoch 185/500\n",
780 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
781 | "Epoch 186/500\n",
782 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
783 | "Epoch 187/500\n",
784 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
785 | "Epoch 188/500\n",
786 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
787 | "Epoch 189/500\n",
788 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
789 | "Epoch 190/500\n",
790 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
791 | "Epoch 191/500\n",
792 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
793 | "Epoch 192/500\n",
794 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
795 | "Epoch 193/500\n",
796 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
797 | "Epoch 194/500\n",
798 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
799 | "Epoch 195/500\n",
800 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
801 | "Epoch 196/500\n",
802 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
803 | "Epoch 197/500\n",
804 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
805 | "Epoch 198/500\n",
806 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
807 | "Epoch 199/500\n",
808 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
809 | "Epoch 200/500\n",
810 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
811 | "Epoch 201/500\n",
812 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
813 | "Epoch 202/500\n",
814 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
815 | "Epoch 203/500\n",
816 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
817 | "Epoch 204/500\n",
818 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
819 | "Epoch 205/500\n",
820 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
821 | "Epoch 206/500\n",
822 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
823 | "Epoch 207/500\n",
824 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
825 | "Epoch 208/500\n",
826 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
827 | "Epoch 209/500\n",
828 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
829 | "Epoch 210/500\n",
830 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
831 | "Epoch 211/500\n",
832 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
833 | "Epoch 212/500\n",
834 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
835 | "Epoch 213/500\n",
836 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
837 | "Epoch 214/500\n",
838 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
839 | "Epoch 215/500\n",
840 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
841 | "Epoch 216/500\n",
842 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
843 | "Epoch 217/500\n",
844 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
845 | "Epoch 218/500\n",
846 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
847 | "Epoch 219/500\n",
848 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
849 | "Epoch 220/500\n",
850 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
851 | "Epoch 221/500\n",
852 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
853 | "Epoch 222/500\n",
854 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
855 | "Epoch 223/500\n",
856 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
857 | "Epoch 224/500\n",
858 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
859 | "Epoch 225/500\n",
860 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
861 | "Epoch 226/500\n",
862 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
863 | "Epoch 227/500\n",
864 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
865 | "Epoch 228/500\n",
866 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
867 | "Epoch 229/500\n",
868 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
869 | "Epoch 230/500\n",
870 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
871 | "Epoch 231/500\n",
872 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
873 | "Epoch 232/500\n",
874 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
875 | "Epoch 233/500\n",
876 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
877 | "Epoch 234/500\n",
878 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
879 | "Epoch 235/500\n",
880 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
881 | "Epoch 236/500\n",
882 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
883 | "Epoch 237/500\n",
884 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
885 | "Epoch 238/500\n",
886 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
887 | "Epoch 239/500\n",
888 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
889 | "Epoch 240/500\n",
890 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
891 | "Epoch 241/500\n",
892 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
893 | "Epoch 242/500\n",
894 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
895 | "Epoch 243/500\n",
896 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
897 | "Epoch 244/500\n",
898 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
899 | "Epoch 245/500\n",
900 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
901 | "Epoch 246/500\n",
902 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
903 | "Epoch 247/500\n",
904 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
905 | "Epoch 248/500\n",
906 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
907 | "Epoch 249/500\n",
908 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
909 | "Epoch 250/500\n",
910 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
911 | "Epoch 251/500\n",
912 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
913 | "Epoch 252/500\n",
914 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
915 | "Epoch 253/500\n",
916 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
917 | "Epoch 254/500\n",
918 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
919 | "Epoch 255/500\n",
920 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
921 | "Epoch 256/500\n",
922 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
923 | "Epoch 257/500\n",
924 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
925 | "Epoch 258/500\n",
926 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
927 | "Epoch 259/500\n",
928 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
929 | "Epoch 260/500\n",
930 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
931 | "Epoch 261/500\n",
932 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
933 | "Epoch 262/500\n",
934 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
935 | "Epoch 263/500\n",
936 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
937 | "Epoch 264/500\n",
938 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
939 | "Epoch 265/500\n",
940 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
941 | "Epoch 266/500\n",
942 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
943 | "Epoch 267/500\n",
944 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
945 | "Epoch 268/500\n",
946 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
947 | "Epoch 269/500\n",
948 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
949 | "Epoch 270/500\n",
950 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
951 | "Epoch 271/500\n",
952 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
953 | "Epoch 272/500\n",
954 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
955 | "Epoch 273/500\n",
956 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
957 | "Epoch 274/500\n",
958 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
959 | "Epoch 275/500\n",
960 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
961 | "Epoch 276/500\n",
962 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
963 | "Epoch 277/500\n",
964 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
965 | "Epoch 278/500\n",
966 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
967 | "Epoch 279/500\n",
968 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
969 | "Epoch 280/500\n",
970 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
971 | "Epoch 281/500\n",
972 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
973 | "Epoch 282/500\n",
974 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
975 | "Epoch 283/500\n",
976 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
977 | "Epoch 284/500\n",
978 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
979 | "Epoch 285/500\n",
980 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
981 | "Epoch 286/500\n",
982 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
983 | "Epoch 287/500\n",
984 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
985 | "Epoch 288/500\n",
986 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
987 | "Epoch 289/500\n",
988 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
989 | "Epoch 290/500\n",
990 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
991 | "Epoch 291/500\n",
992 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
993 | "Epoch 292/500\n",
994 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
995 | "Epoch 293/500\n",
996 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
997 | "Epoch 294/500\n",
998 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
999 | "Epoch 295/500\n",
1000 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1001 | "Epoch 296/500\n",
1002 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1003 | "Epoch 297/500\n",
1004 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1005 | "Epoch 298/500\n",
1006 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1007 | "Epoch 299/500\n",
1008 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1009 | "Epoch 300/500\n",
1010 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1011 | "Epoch 301/500\n",
1012 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1013 | "Epoch 302/500\n",
1014 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1015 | "Epoch 303/500\n",
1016 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1017 | "Epoch 304/500\n",
1018 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1019 | "Epoch 305/500\n",
1020 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1021 | "Epoch 306/500\n",
1022 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1023 | "Epoch 307/500\n",
1024 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1025 | "Epoch 308/500\n",
1026 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1027 | "Epoch 309/500\n",
1028 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1029 | "Epoch 310/500\n",
1030 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1031 | "Epoch 311/500\n",
1032 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1033 | "Epoch 312/500\n",
1034 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1035 | "Epoch 313/500\n",
1036 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1037 | "Epoch 314/500\n",
1038 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1039 | "Epoch 315/500\n",
1040 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1041 | "Epoch 316/500\n",
1042 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1043 | "Epoch 317/500\n",
1044 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1045 | "Epoch 318/500\n",
1046 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1047 | "Epoch 319/500\n",
1048 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1049 | "Epoch 320/500\n",
1050 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1051 | "Epoch 321/500\n",
1052 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1053 | "Epoch 322/500\n",
1054 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1055 | "Epoch 323/500\n",
1056 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1057 | "Epoch 324/500\n",
1058 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1059 | "Epoch 325/500\n",
1060 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1061 | "Epoch 326/500\n",
1062 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1063 | "Epoch 327/500\n",
1064 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1065 | "Epoch 328/500\n",
1066 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1067 | "Epoch 329/500\n",
1068 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1069 | "Epoch 330/500\n",
1070 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1071 | "Epoch 331/500\n",
1072 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1073 | "Epoch 332/500\n",
1074 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1075 | "Epoch 333/500\n",
1076 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1077 | "Epoch 334/500\n",
1078 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1079 | "Epoch 335/500\n",
1080 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1081 | "Epoch 336/500\n",
1082 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1083 | "Epoch 337/500\n",
1084 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1085 | "Epoch 338/500\n",
1086 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1087 | "Epoch 339/500\n",
1088 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1089 | "Epoch 340/500\n",
1090 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1091 | "Epoch 341/500\n",
1092 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1093 | "Epoch 342/500\n",
1094 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1095 | "Epoch 343/500\n",
1096 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1097 | "Epoch 344/500\n",
1098 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1099 | "Epoch 345/500\n",
1100 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1101 | "Epoch 346/500\n",
1102 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1103 | "Epoch 347/500\n",
1104 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1105 | "Epoch 348/500\n",
1106 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1107 | "Epoch 349/500\n",
1108 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1109 | "Epoch 350/500\n",
1110 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1111 | "Epoch 351/500\n",
1112 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1113 | "Epoch 352/500\n",
1114 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1115 | "Epoch 353/500\n",
1116 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1117 | "Epoch 354/500\n",
1118 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1119 | "Epoch 355/500\n",
1120 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1121 | "Epoch 356/500\n",
1122 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1123 | "Epoch 357/500\n",
1124 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1125 | "Epoch 358/500\n",
1126 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1127 | "Epoch 359/500\n",
1128 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1129 | "Epoch 360/500\n",
1130 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1131 | "Epoch 361/500\n",
1132 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1133 | "Epoch 362/500\n",
1134 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1135 | "Epoch 363/500\n",
1136 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1137 | "Epoch 364/500\n",
1138 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1139 | "Epoch 365/500\n",
1140 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1141 | "Epoch 366/500\n",
1142 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1143 | "Epoch 367/500\n",
1144 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1145 | "Epoch 368/500\n",
1146 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1147 | "Epoch 369/500\n",
1148 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1149 | "Epoch 370/500\n",
1150 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1151 | "Epoch 371/500\n",
1152 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1153 | "Epoch 372/500\n",
1154 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1155 | "Epoch 373/500\n",
1156 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1157 | "Epoch 374/500\n",
1158 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1159 | "Epoch 375/500\n",
1160 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1161 | "Epoch 376/500\n",
1162 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1163 | "Epoch 377/500\n",
1164 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1165 | "Epoch 378/500\n",
1166 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1167 | "Epoch 379/500\n",
1168 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1169 | "Epoch 380/500\n",
1170 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1171 | "Epoch 381/500\n",
1172 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1173 | "Epoch 382/500\n",
1174 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1175 | "Epoch 383/500\n",
1176 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1177 | "Epoch 384/500\n",
1178 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1179 | "Epoch 385/500\n",
1180 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1181 | "Epoch 386/500\n",
1182 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1183 | "Epoch 387/500\n",
1184 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1185 | "Epoch 388/500\n",
1186 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1187 | "Epoch 389/500\n",
1188 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1189 | "Epoch 390/500\n",
1190 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1191 | "Epoch 391/500\n",
1192 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1193 | "Epoch 392/500\n",
1194 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1195 | "Epoch 393/500\n",
1196 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1197 | "Epoch 394/500\n",
1198 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1199 | "Epoch 395/500\n",
1200 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1201 | "Epoch 396/500\n",
1202 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1203 | "Epoch 397/500\n",
1204 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1205 | "Epoch 398/500\n",
1206 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1207 | "Epoch 399/500\n",
1208 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1209 | "Epoch 400/500\n",
1210 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1211 | "Epoch 401/500\n",
1212 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1213 | "Epoch 402/500\n",
1214 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1215 | "Epoch 403/500\n",
1216 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1217 | "Epoch 404/500\n",
1218 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1219 | "Epoch 405/500\n",
1220 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1221 | "Epoch 406/500\n",
1222 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1223 | "Epoch 407/500\n",
1224 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1225 | "Epoch 408/500\n",
1226 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1227 | "Epoch 409/500\n",
1228 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1229 | "Epoch 410/500\n",
1230 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1231 | "Epoch 411/500\n",
1232 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1233 | "Epoch 412/500\n",
1234 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1235 | "Epoch 413/500\n",
1236 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1237 | "Epoch 414/500\n",
1238 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1239 | "Epoch 415/500\n",
1240 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1241 | "Epoch 416/500\n",
1242 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1243 | "Epoch 417/500\n",
1244 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1245 | "Epoch 418/500\n",
1246 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n",
1247 | "Epoch 419/500\n",
1248 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1249 | "Epoch 420/500\n",
1250 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n",
1251 | "Epoch 421/500\n",
1252 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1253 | "Epoch 422/500\n",
1254 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1255 | "Epoch 423/500\n",
1256 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1257 | "Epoch 424/500\n",
1258 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n",
1259 | "Epoch 425/500\n",
1260 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1261 | "Epoch 426/500\n",
1262 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n",
1263 | "Epoch 427/500\n",
1264 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1265 | "Epoch 428/500\n",
1266 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1267 | "Epoch 429/500\n",
1268 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1269 | "Epoch 430/500\n",
1270 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1271 | "Epoch 431/500\n",
1272 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0000e+00\n",
1273 | "Epoch 432/500\n",
1274 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1275 | "Epoch 433/500\n",
1276 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n",
1277 | "Epoch 434/500\n",
1278 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n",
1279 | "Epoch 435/500\n",
1280 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n",
1281 | "Epoch 436/500\n",
1282 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1283 | "Epoch 437/500\n",
1284 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1285 | "Epoch 438/500\n",
1286 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1287 | "Epoch 439/500\n",
1288 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n",
1289 | "Epoch 440/500\n",
1290 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1291 | "Epoch 441/500\n",
1292 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 3.1309e-04\n",
1293 | "Epoch 442/500\n",
1294 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 6.2617e-04\n",
1295 | "Epoch 443/500\n",
1296 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0016\n",
1297 | "Epoch 444/500\n",
1298 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n",
1299 | "Epoch 445/500\n",
1300 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n",
1301 | "Epoch 446/500\n",
1302 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n",
1303 | "Epoch 447/500\n",
1304 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n",
1305 | "Epoch 448/500\n",
1306 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0016\n",
1307 | "Epoch 449/500\n",
1308 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n",
1309 | "Epoch 450/500\n",
1310 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0019\n",
1311 | "Epoch 451/500\n",
1312 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n",
1313 | "Epoch 452/500\n",
1314 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n",
1315 | "Epoch 453/500\n",
1316 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0041\n",
1317 | "Epoch 454/500\n",
1318 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0028\n",
1319 | "Epoch 455/500\n",
1320 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0034\n",
1321 | "Epoch 456/500\n",
1322 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0019\n",
1323 | "Epoch 457/500\n",
1324 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0025\n",
1325 | "Epoch 458/500\n",
1326 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n",
1327 | "Epoch 459/500\n",
1328 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0028\n",
1329 | "Epoch 460/500\n",
1330 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0025\n",
1331 | "Epoch 461/500\n",
1332 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0022\n",
1333 | "Epoch 462/500\n",
1334 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0031\n",
1335 | "Epoch 463/500\n",
1336 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0044\n",
1337 | "Epoch 464/500\n",
1338 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0028\n",
1339 | "Epoch 465/500\n",
1340 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0031\n",
1341 | "Epoch 466/500\n",
1342 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n",
1343 | "Epoch 467/500\n",
1344 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n",
1345 | "Epoch 468/500\n",
1346 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0013\n",
1347 | "Epoch 469/500\n",
1348 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0047\n",
1349 | "Epoch 470/500\n",
1350 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0044\n",
1351 | "Epoch 471/500\n",
1352 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0085\n",
1353 | "Epoch 472/500\n",
1354 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0100\n",
1355 | "Epoch 473/500\n",
1356 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0163\n",
1357 | "Epoch 474/500\n",
1358 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0157\n",
1359 | "Epoch 475/500\n",
1360 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0059\n",
1361 | "Epoch 476/500\n",
1362 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0053\n",
1363 | "Epoch 477/500\n",
1364 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0066\n",
1365 | "Epoch 478/500\n",
1366 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0078\n",
1367 | "Epoch 479/500\n",
1368 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0091\n",
1369 | "Epoch 480/500\n",
1370 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0138\n",
1371 | "Epoch 481/500\n",
1372 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0066\n",
1373 | "Epoch 482/500\n",
1374 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0081\n",
1375 | "Epoch 483/500\n",
1376 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0119\n",
1377 | "Epoch 484/500\n",
1378 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0185\n",
1379 | "Epoch 485/500\n",
1380 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0100\n",
1381 | "Epoch 486/500\n",
1382 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0138\n",
1383 | "Epoch 487/500\n",
1384 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0238\n",
1385 | "Epoch 488/500\n",
1386 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0122\n",
1387 | "Epoch 489/500\n",
1388 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0110\n",
1389 | "Epoch 490/500\n",
1390 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0150\n",
1391 | "Epoch 491/500\n",
1392 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0041\n",
1393 | "Epoch 492/500\n",
1394 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 9.3926e-04\n",
1395 | "Epoch 493/500\n",
1396 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0022\n",
1397 | "Epoch 494/500\n",
1398 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0050\n",
1399 | "Epoch 495/500\n",
1400 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0053\n",
1401 | "Epoch 496/500\n",
1402 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0028\n",
1403 | "Epoch 497/500\n",
1404 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0047\n",
1405 | "Epoch 498/500\n",
1406 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0128\n",
1407 | "Epoch 499/500\n",
1408 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0185\n",
1409 | "Epoch 500/500\n",
1410 | "3194/3194 [==============================] - 39s 12ms/sample - loss: inf - accuracy: 0.0222\n"
1411 | ]
1412 | }
1413 | ],
1414 | "source": [
1415 | "# train asr model\n",
1416 | "history = pipeline.fit(train_dataset = train_data, batch_size=128, epochs=500)\n",
1417 | "\n",
1418 | "# history = pipeline.fit_iter(train_dataset = train_data, batch_size=32, epochs=3,iter_num=500,checkpoint=project_path+'checkpoints')\n",
1419 | "# history = pipeline.fit_generator(train_dataset = train_data, batch_size=32, epochs=500)"
1420 | ]
1421 | },
1422 | {
1423 | "cell_type": "code",
1424 | "execution_count": null,
1425 | "metadata": {
1426 | "colab": {},
1427 | "colab_type": "code",
1428 | "id": "5WbeF-OWwhZB"
1429 | },
1430 | "outputs": [],
1431 | "source": [
1432 | "# save deepasr ctc pipeline\n",
1433 | "pipeline.save(project_path+'checkpoints')"
1434 | ]
1435 | },
1436 | {
1437 | "cell_type": "markdown",
1438 | "metadata": {
1439 | "colab_type": "text",
1440 | "id": "o_psolNH4XFl"
1441 | },
1442 | "source": [
1443 | "# 4. Model testing"
1444 | ]
1445 | },
1446 | {
1447 | "cell_type": "code",
1448 | "execution_count": 12,
1449 | "metadata": {
1450 | "colab": {
1451 | "base_uri": "https://localhost:8080/",
1452 | "height": 34
1453 | },
1454 | "colab_type": "code",
1455 | "id": "EEgiUEkVc07E",
1456 | "outputId": "fb6184a0-c0d9-4fe9-f445-37477d4661ff"
1457 | },
1458 | "outputs": [
1459 | {
1460 | "name": "stdout",
1461 | "output_type": "stream",
1462 | "text": [
1463 | "WARNING:tensorflow:No training configuration found in save file: the model was *not* compiled. Compile it manually.\n"
1464 | ]
1465 | }
1466 | ],
1467 | "source": [
1468 | "# load saved ctc pipeline\n",
1469 | "pipeline1 = asr.pipeline.load(project_path+'checkpoints')"
1470 | ]
1471 | },
1472 | {
1473 | "cell_type": "code",
1474 | "execution_count": 13,
1475 | "metadata": {
1476 | "colab": {
1477 | "base_uri": "https://localhost:8080/",
1478 | "height": 67
1479 | },
1480 | "colab_type": "code",
1481 | "id": "xkPo_3SMtzHp",
1482 | "outputId": "bcdc7fa7-6852-4aa8-99e3-6db3d8508ada"
1483 | },
1484 | "outputs": [
1485 | {
1486 | "name": "stdout",
1487 | "output_type": "stream",
1488 | "text": [
1489 | "Audio File: ./LibriSpeech/train-clean-100/27/124992/27-124992-0063.flac\n",
1490 | "Audio Transcription: WENT THROUGH THE PLAINS BUT WHEN THEY CAME NEAR THE MOUNTAINS\n",
1491 | "Trancript length: 61\n"
1492 | ]
1493 | }
1494 | ],
1495 | "source": [
1496 | "# get testing audio and transcript from dataset\n",
1497 | "index = np.random.randint(train_data.shape[0])\n",
1498 | "data = train_data.iloc[index]\n",
1499 | "test_file = data[0]\n",
1500 | "test_transcript = data[1]\n",
1501 | "# Audio file\n",
1502 | "print(\"Audio File:\",test_file)\n",
1503 | "# ground truth\n",
1504 | "print(\"Audio Transcription:\", test_transcript)\n",
1505 | "print(\"Transcript length:\",len(test_transcript))"
1506 | ]
1507 | },
1508 | {
1509 | "cell_type": "code",
1510 | "execution_count": 14,
1511 | "metadata": {
1512 | "colab": {
1513 | "base_uri": "https://localhost:8080/",
1514 | "height": 87
1515 | },
1516 | "colab_type": "code",
1517 | "id": "moqXWTQVvdxC",
1518 | "outputId": "0645ce28-1da9-447e-cc9d-93a4f57096c8"
1519 | },
1520 | "outputs": [
1521 | {
1522 | "name": "stdout",
1523 | "output_type": "stream",
1524 | "text": [
1525 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/backend.py:5811: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.\n",
1526 | "Instructions for updating:\n",
1527 | "Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.\n"
1528 | ]
1529 | }
1530 | ],
1531 | "source": [
1532 | "# predict labels\n",
1533 | "pred= pipeline1.predict(test_file)"
1534 | ]
1535 | },
1536 | {
1537 | "cell_type": "code",
1538 | "execution_count": 15,
1539 | "metadata": {
1540 | "colab": {
1541 | "base_uri": "https://localhost:8080/",
1542 | "height": 34
1543 | },
1544 | "colab_type": "code",
1545 | "id": "oNvRyWq8weZs",
1546 | "outputId": "88732c3b-5412-4a84-bb25-d517d4251a8c"
1547 | },
1548 | "outputs": [
1549 | {
1550 | "data": {
1551 | "text/plain": [
1552 | "'WENT THROUGH THE PLAINS BUT WHEN THEY CAME NEAR THE MOUNTAINS'"
1553 | ]
1554 | },
1555 | "execution_count": 15,
1556 | "metadata": {
1557 | "tags": []
1558 | },
1559 | "output_type": "execute_result"
1560 | }
1561 | ],
1562 | "source": [
1563 | "pred[0].upper()"
1564 | ]
1565 | },
1566 | {
1567 | "cell_type": "code",
1568 | "execution_count": null,
1569 | "metadata": {
1570 | "colab": {},
1571 | "colab_type": "code",
1572 | "id": "AFMoK13mtR6V"
1573 | },
1574 | "outputs": [],
1575 | "source": []
1576 | }
1577 | ],
1578 | "metadata": {
1579 | "accelerator": "GPU",
1580 | "colab": {
1581 | "collapsed_sections": [],
1582 | "machine_shape": "hm",
1583 | "name": "DeepAsr-CTC_Pipeline.ipynb",
1584 | "provenance": []
1585 | },
1586 | "kernelspec": {
1587 | "display_name": "Python 3",
1588 | "language": "python",
1589 | "name": "python3"
1590 | },
1591 | "language_info": {
1592 | "codemirror_mode": {
1593 | "name": "ipython",
1594 | "version": 3
1595 | },
1596 | "file_extension": ".py",
1597 | "mimetype": "text/x-python",
1598 | "name": "python",
1599 | "nbconvert_exporter": "python",
1600 | "pygments_lexer": "ipython3",
1601 | "version": "3.7.5"
1602 | }
1603 | },
1604 | "nbformat": 4,
1605 | "nbformat_minor": 4
1606 | }
1607 |
--------------------------------------------------------------------------------