├── _config.yml
├── .gitignore
├── utils
    ├── __init__.py
    ├── test
    │   ├── __init__.py
    │   ├── 098569.mp3
    │   └── ftest_stftForTheInpaintingSetting.py
    ├── legacy
    │   ├── __init__.py
    │   ├── notebooks
    │   │   ├── __init__.py
    │   │   ├── train.ipynb
    │   │   ├── try.ipynb
    │   │   └── test.ipynb
    │   ├── simulations
    │   │   ├── __init__.py
    │   │   ├── nets.ods
    │   │   ├── simple.py
    │   │   ├── stft_istft_tfReconstructionTest.py
    │   │   ├── runNatBigger.py
    │   │   ├── runNatNatBigger.py
    │   │   ├── runNatStftTest.py
    │   │   ├── runNatStftMagnitudeTest.py
    │   │   ├── runNatStftRealImagTest.py
    │   │   ├── runNatStftSeventh.py
    │   │   ├── runNatStftEigth.py
    │   │   ├── runNatStftSixth.py
    │   │   ├── runNatMagPhaseGapTest.py
    │   │   ├── runNatStftSec.py
    │   │   ├── runNatStftGapTest.py
    │   │   ├── runNatStftGapToMagTest.py
    │   │   ├── runNatStftThird.py
    │   │   ├── runNatStftFifth.py
    │   │   ├── runNatStftGapOneOneTest.py
    │   │   ├── runNatStftGapBIGTest.py
    │   │   ├── runNat.py
    │   │   ├── runNatSkip.py
    │   │   └── runNatBig.py
    │   ├── plotSummary.py
    │   ├── stftPhaseContextEncoder.py
    │   ├── evaluationWriter.py
    │   ├── timeLiner.py
    │   ├── stftGapContextEncoder.py
    │   └── stftRealImagContextEncoder.py
    ├── logdir
    │   └── readme.md
    ├── saved_models
    │   └── readme.md
    ├── strechableNumpyArray.py
    ├── colorize.py
    ├── tfReader.py
    └── saveParameters.py
├── network
    ├── __init__.py
    └── emptyTFGraph.py
├── system
    ├── __init__.py
    ├── dnnSystem.py
    ├── preAndPostProcessor.py
    ├── magPreAndPostProcessor.py
    └── contextEncoderSystem.py
├── architecture
    ├── __init__.py
    ├── parameters
    │   ├── __init__.py
    │   ├── fullyLayerParams.py
    │   ├── convNetworkParams.py
    │   └── contextEncoderParameters.py
    ├── architecture.py
    ├── channelWiseContextEncoderArchitecture.py
    └── contextEncoderArchitecture.py
├── datasetGenerator
    ├── __init__.py
    ├── fmaTFRecordGenerator.py
    ├── nSynthTFRecordGenerator.py
    ├── nSynthDownloader.py
    ├── fmaDownloader.py
    ├── fakeTFRecordGenerator.py
    ├── downloader.py
    ├── exampleProcessor.py
    └── tfRecordGenerator.py
├── images
    ├── Nsynth_2.png
    ├── Nsynth_3.png
    ├── Nsynth_6.png
    ├── Nsynth_7.png
    ├── Nsynth_12.png
    ├── Nsynth_13.png
    ├── Nsynth_17.png
    ├── Nsynth_67.png
    ├── decoder-signal.jpg
    ├── encoder-signal.jpg
    └── good-spectrogram.png
├── requirements.txt
├── complex_network_parameters.pkl
├── magnitude_network_parameters.pkl
├── audio_examples
    ├── faded
    │   ├── nsynth_17_or.mp3
    │   ├── nsynth_20_or.mp3
    │   ├── nsynth_3_or.mp3
    │   ├── nsynth_3_rec.mp3
    │   ├── nsynth_17_rec.mp3
    │   ├── nsynth_20_rec.mp3
    │   ├── nsynth_17_complex_rec.mp3
    │   ├── nsynth_20_complex_rec.mp3
    │   └── nsynth_3_complex_rec.mp3
    ├── good
    │   ├── nsynth_14_or.mp3
    │   ├── nsynth_14_rec.mp3
    │   ├── nsynth_15_or.mp3
    │   ├── nsynth_15_rec.mp3
    │   ├── nsynth_16_or.mp3
    │   ├── nsynth_16_rec.mp3
    │   ├── nsynth_2_or.mp3
    │   ├── nsynth_2_rec.mp3
    │   ├── nsynth_4_or.mp3
    │   ├── nsynth_4_rec.mp3
    │   ├── nsynth_5_or.mp3
    │   ├── nsynth_5_rec.mp3
    │   ├── nsynth_67_or.mp3
    │   ├── nsynth_67_rec.mp3
    │   ├── nsynth_6_or.mp3
    │   ├── nsynth_6_rec.mp3
    │   ├── nsynth_7_or.mp3
    │   ├── nsynth_7_rec.mp3
    │   ├── nsynth_8_or.mp3
    │   ├── nsynth_8_rec.mp3
    │   ├── nsynth_2_complex_rec.mp3
    │   ├── nsynth_4_complex_rec.mp3
    │   ├── nsynth_5_complex_rec.mp3
    │   ├── nsynth_6_complex_rec.mp3
    │   ├── nsynth_7_complex_rec.mp3
    │   ├── nsynth_8_complex_rec.mp3
    │   ├── nsynth_14_complex_rec.mp3
    │   ├── nsynth_15_complex_rec.mp3
    │   ├── nsynth_16_complex_rec.mp3
    │   └── nsynth_67_complex_rec.mp3
    └── noisy
    │   ├── nsynth_12_or.mp3
    │   ├── nsynth_13_or.mp3
    │   ├── nsynth_18_or.mp3
    │   ├── nsynth_12_rec.mp3
    │   ├── nsynth_13_rec.mp3
    │   ├── nsynth_18_rec.mp3
    │   ├── nsynth_12_complex_rec.mp3
    │   ├── nsynth_13_complex_rec.mp3
    │   └── nsynth_18_complex_rec.mp3
├── LPC-based extrapolation
    ├── mySNR.m
    ├── lpcPaper.m
    └── lpcInFolder.m
├── make_fakedataset.py
├── trainComplexNetwork.py
├── make_nsynthdataset.py
├── trainMagnitudeNetwork.py
├── make_fmadataset.py
├── README.md
└── SpecDivExperimentMag.m


/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/network/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/system/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/utils/test/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/architecture/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/utils/legacy/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/datasetGenerator/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/architecture/parameters/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/utils/legacy/notebooks/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Andres'
2 | 


--------------------------------------------------------------------------------
/utils/logdir/readme.md:
--------------------------------------------------------------------------------
1 | This folder contains the logs that are saved in the project


--------------------------------------------------------------------------------
/utils/saved_models/readme.md:
--------------------------------------------------------------------------------
1 | This folder contains the models that are saved in the project


--------------------------------------------------------------------------------
/images/Nsynth_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/Nsynth_2.png


--------------------------------------------------------------------------------
/images/Nsynth_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/Nsynth_3.png


--------------------------------------------------------------------------------
/images/Nsynth_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/Nsynth_6.png


--------------------------------------------------------------------------------
/images/Nsynth_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/Nsynth_7.png


--------------------------------------------------------------------------------
/images/Nsynth_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/Nsynth_12.png


--------------------------------------------------------------------------------
/images/Nsynth_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/Nsynth_13.png


--------------------------------------------------------------------------------
/images/Nsynth_17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/Nsynth_17.png


--------------------------------------------------------------------------------
/images/Nsynth_67.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/Nsynth_67.png


--------------------------------------------------------------------------------
/utils/test/098569.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/utils/test/098569.mp3


--------------------------------------------------------------------------------
/images/decoder-signal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/decoder-signal.jpg


--------------------------------------------------------------------------------
/images/encoder-signal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/encoder-signal.jpg


--------------------------------------------------------------------------------
/images/good-spectrogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/images/good-spectrogram.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow_gpu==1.4.0
2 | librosa==0.5.1
3 | audioread==2.1.5
4 | matplotlib==2.1.0
5 | numpy==1.14.1
6 | 
7 | 


--------------------------------------------------------------------------------
/complex_network_parameters.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/complex_network_parameters.pkl


--------------------------------------------------------------------------------
/magnitude_network_parameters.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/magnitude_network_parameters.pkl


--------------------------------------------------------------------------------
/utils/legacy/simulations/nets.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/utils/legacy/simulations/nets.ods


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_17_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_17_or.mp3


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_20_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_20_or.mp3


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_3_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_3_or.mp3


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_3_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_3_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_14_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_14_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_14_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_14_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_15_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_15_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_15_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_15_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_16_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_16_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_16_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_16_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_2_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_2_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_2_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_2_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_4_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_4_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_4_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_4_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_5_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_5_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_5_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_5_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_67_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_67_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_67_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_67_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_6_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_6_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_6_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_6_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_7_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_7_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_7_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_7_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_8_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_8_or.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_8_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_8_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_12_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_12_or.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_13_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_13_or.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_18_or.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_18_or.mp3


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_17_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_17_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_20_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_20_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_12_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_12_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_13_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_13_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_18_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_18_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_2_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_2_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_4_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_4_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_5_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_5_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_6_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_6_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_7_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_7_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_8_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_8_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_17_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_17_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_20_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_20_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/faded/nsynth_3_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/faded/nsynth_3_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_14_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_14_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_15_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_15_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_16_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_16_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/good/nsynth_67_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/good/nsynth_67_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_12_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_12_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_13_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_13_complex_rec.mp3


--------------------------------------------------------------------------------
/audio_examples/noisy/nsynth_18_complex_rec.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andimarafioti/audioContextEncoder/HEAD/audio_examples/noisy/nsynth_18_complex_rec.mp3


--------------------------------------------------------------------------------
/LPC-based extrapolation/mySNR.m:
--------------------------------------------------------------------------------
1 | function [result] = mySNR(orig_signal, inpainted)
2 | 
3 | norm_orig =  norm(orig_signal);
4 | norm_difference = norm(orig_signal-inpainted);
5 | result = 10*log10(abs(norm_orig^2)/(abs(norm_difference^2)));
6 | end


--------------------------------------------------------------------------------
/utils/legacy/simulations/simple.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | 
4 | fs = 16000
5 | time = np.arange(0, 0.005, 1/fs)
6 | plt.plot(np.sin(2 * np.pi *440 * time , dtype=np.float32) + np.random.normal(0, 0.1, len(time)))
7 | plt.show()


--------------------------------------------------------------------------------
/datasetGenerator/fmaTFRecordGenerator.py:
--------------------------------------------------------------------------------
1 | from datasetGenerator.tfRecordGenerator import TFRecordGenerator
2 | 
3 | __author__ = 'Andres'
4 | 
5 | 
6 | class FMATFRecordGenerator(TFRecordGenerator):
7 |     def _filenameShouldBeLoaded(self, filename):
8 |         return filename.endswith('.mp3')
9 | 


--------------------------------------------------------------------------------
/make_fakedataset.py:
--------------------------------------------------------------------------------
 1 | from datasetGenerator.exampleProcessor import ExampleProcessor
 2 | from datasetGenerator.fakeTFRecordGenerator import FakeTFRecordGenerator
 3 | 
 4 | __author__ = 'Andres'
 5 | 
 6 | 
 7 | exampleProcessor = ExampleProcessor(gapLength=1024, sideLength=2048, hopSize=512, gapMinRMS=1e-3)
 8 | 
 9 | tfRecordGenerator = FakeTFRecordGenerator(baseName='fake', pathToDataFolder='', exampleProcessor=exampleProcessor)
10 | tfRecordGenerator.generateDataset()
11 | 


--------------------------------------------------------------------------------
/datasetGenerator/nSynthTFRecordGenerator.py:
--------------------------------------------------------------------------------
 1 | from datasetGenerator.tfRecordGenerator import TFRecordGenerator
 2 | 
 3 | __author__ = 'Andres'
 4 | 
 5 | 
 6 | class NSynthTFRecordGenerator(TFRecordGenerator):
 7 |     def _filenameShouldBeLoaded(self, filename):
 8 |         return filename.endswith('.wav')
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     from datasetGenerator.exampleProcessor import ExampleProcessor
13 | 
14 |     exampleProcessor = ExampleProcessor()
15 |     tfRecordGen = NSynthTFRecordGenerator(baseName='test', pathToDataFolder='nsynth-test/audio', exampleProcessor=exampleProcessor)
16 |     tfRecordGen.generateDataset()
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/network/emptyTFGraph.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from network.tfGraph import TFGraph
 3 | 
 4 | __author__ = 'Andres'
 5 | 
 6 | 
 7 | class EmptyTfGraph(TFGraph):
 8 |     """
 9 |     This class is meant to represent a tensorflow graph.
10 |     It is initialized empty and one can add different types of layers to it.
11 |     The output of the network is accessed with output()
12 |     The input of the function is a placeholder and can be set with input()
13 | 
14 |     input_shape : Shape of the input (with batch size)
15 |     """
16 | 
17 |     def __init__(self, shapeOfInput, isTraining, name):
18 |         inputSignal = tf.placeholder(tf.float32, shape=shapeOfInput, name='input_data')
19 |         super().__init__(inputSignal=inputSignal, isTraining=isTraining, name=name)
20 | 


--------------------------------------------------------------------------------
/architecture/parameters/fullyLayerParams.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | __author__ = 'Andres'
 4 | 
 5 | 
 6 | class FullyLayerParams(object):
 7 |     def __init__(self, inputShape, outputShape, name):
 8 |         assert inputShape[0] == outputShape[0], 'Batch size is expected to be the first element in the shapes'
 9 | 
10 |         self._inputShape = inputShape
11 |         self._outputShape = outputShape
12 |         self._name = name
13 | 
14 |     def inputShape(self):
15 |         return self._inputShape
16 | 
17 |     def outputShape(self):
18 |         return self._outputShape
19 | 
20 |     def name(self):
21 |         return self._name
22 | 
23 |     def batchSize(self):
24 |         return self._inputShape[0]
25 | 
26 |     def inputChannels(self):
27 |         return np.prod(self._inputShape[1:])
28 | 
29 |     def outputChannels(self):
30 |         return np.prod(self._outputShape[1:])
31 | 


--------------------------------------------------------------------------------
/architecture/parameters/convNetworkParams.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Andres'
 2 | 
 3 | 
 4 | class ConvNetworkParams(object):
 5 |     def __init__(self, filterShapes, channels, strides, name):
 6 |         self._filterShapes = filterShapes
 7 |         self._channels = channels
 8 |         self._strides = strides
 9 |         self._name = name
10 | 
11 |     def filterShapes(self):
12 |         return self._filterShapes
13 | 
14 |     def channels(self):
15 |         return self._channels
16 | 
17 |     def inputChannels(self):
18 |         return self._channels[:-1]
19 | 
20 |     def outputChannels(self):
21 |         return self._channels[1:]
22 | 
23 |     def strides(self):
24 |         return self._strides
25 | 
26 |     def name(self):
27 |         return self._name
28 | 
29 |     def layerCount(self):
30 |         return len(self._strides)
31 | 
32 |     def convNames(self):
33 |         return ["Conv_"+str(index) for index in range(self.layerCount())]
34 | 


--------------------------------------------------------------------------------
/trainComplexNetwork.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | from architecture.contextEncoderArchitecture import ContextEncoderArchitecture
 4 | from system.contextEncoderSystem import ContextEncoderSystem
 5 | from system.preAndPostProcessor import PreAndPostProcessor
 6 | 
 7 | architecturesParametersFile = "complex_network_parameters.pkl"
 8 | sessionsName = "complex_network"
 9 | 
10 | with open(architecturesParametersFile, 'rb') as savedFile:
11 |     Context_Encoder_parameters = pickle.load(savedFile)
12 | 
13 | aContextEncoderArchitecture = ContextEncoderArchitecture(*Context_Encoder_parameters.architectureParameters())
14 | aPreProcessor = PreAndPostProcessor(*Context_Encoder_parameters.preProcessorParameters())
15 | aContextEncoderSystem = ContextEncoderSystem(aContextEncoderArchitecture, Context_Encoder_parameters.batchSize(),
16 |                                              aPreProcessor, sessionsName)
17 | aContextEncoderSystem.train("nsynth_train_w5120_g1024_h512.tfrecords", "nsynth_valid_w5120_g1024_h512.tfrecords", 1e-3)
18 | 


--------------------------------------------------------------------------------
/make_nsynthdataset.py:
--------------------------------------------------------------------------------
 1 | from datasetGenerator.exampleProcessor import ExampleProcessor
 2 | from datasetGenerator.nSynthDownloader import NSynthDownloader
 3 | from datasetGenerator.nSynthTFRecordGenerator import NSynthTFRecordGenerator
 4 | 
 5 | __author__ = 'Andres'
 6 | 
 7 | 
 8 | downloader = NSynthDownloader()
 9 | downloader.downloadAndExtract()
10 | 
11 | exampleProcessor = ExampleProcessor(gapLength=1024, sideLength=2048, hopSize=512, gapMinRMS=1e-3)
12 | 
13 | tfRecordGenerator = NSynthTFRecordGenerator(baseName='nsynth_test', pathToDataFolder=downloader.TEST_DIR, exampleProcessor=exampleProcessor)
14 | tfRecordGenerator.generateDataset()
15 | 
16 | tfRecordGenerator = NSynthTFRecordGenerator(baseName='nsynth_valid', pathToDataFolder=downloader.VALID_DIR, exampleProcessor=exampleProcessor)
17 | tfRecordGenerator.generateDataset()
18 | 
19 | tfRecordGenerator = NSynthTFRecordGenerator(baseName='nsynth_train', pathToDataFolder=downloader.TRAIN_DIR, exampleProcessor=exampleProcessor)
20 | tfRecordGenerator.generateDataset()
21 | 


--------------------------------------------------------------------------------
/utils/strechableNumpyArray.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | __author__ = 'Andres'
 4 | 
 5 | 
 6 | class StrechableNumpyArray(object):
 7 |     """When trying to add values to a numpy array, things can get slow if the array is too large.
 8 |     This class tries to solve that by updating the size of the array incrementally"""
 9 |     def __init__(self, dtype=np.float32):
10 |         self._dtype = dtype
11 |         self.data = np.zeros((1000000,), dtype=self._dtype)
12 |         self.size = 0
13 | 
14 |     def append(self, x):
15 |         if self.size + len(x) >= len(self.data):
16 |             capacity = 4 * len(self.data)
17 |             newdata = np.zeros((capacity,), dtype=self._dtype)
18 |             newdata[:self.size] = self.data[:self.size]
19 |             self.data = newdata
20 | 
21 |         self.data[self.size: self.size + len(x)] = x
22 |         self.size += len(x)
23 | 
24 |     def finalize(self):
25 |         output_data = self.data[:self.size]
26 |         del self.data
27 |         return output_data
28 | 


--------------------------------------------------------------------------------
/trainMagnitudeNetwork.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | from architecture.contextEncoderArchitecture import ContextEncoderArchitecture
 4 | from system.contextEncoderSystem import ContextEncoderSystem
 5 | from system.magPreAndPostProcessor import MagPreAndPostProcessor
 6 | 
 7 | architecturesParametersFile = "magnitude_network_parameters.pkl"
 8 | sessionsName = "magnitude_network"
 9 | 
10 | with open(architecturesParametersFile, 'rb') as savedFile:
11 |     Context_Encoder_parameters = pickle.load(savedFile)
12 | 
13 | aContextEncoderArchitecture = ContextEncoderArchitecture(*Context_Encoder_parameters.architectureParameters())
14 | aPreProcessor = MagPreAndPostProcessor(*Context_Encoder_parameters.preProcessorParameters())
15 | aContextEncoderSystem = ContextEncoderSystem(aContextEncoderArchitecture, Context_Encoder_parameters.batchSize(),
16 |                                              aPreProcessor, sessionsName)
17 | aContextEncoderSystem.train("nsynth_train_w5120_g1024_h512.tfrecords", "nsynth_valid_w5120_g1024_h512.tfrecords", 1e-3)
18 | 


--------------------------------------------------------------------------------
/make_fmadataset.py:
--------------------------------------------------------------------------------
 1 | import librosa
 2 | from audioread import NoBackendError
 3 | 
 4 | from datasetGenerator.exampleProcessor import ExampleProcessor
 5 | from datasetGenerator.fmaDownloader import FMADownloader
 6 | from datasetGenerator.fmaTFRecordGenerator import FMATFRecordGenerator
 7 | 
 8 | __author__ = 'Andres'
 9 | 
10 | try:  # Test the backend for mp3 files
11 |     librosa.load("utils/test/098569.mp3")
12 | except NoBackendError as e:
13 |     raise e
14 | 
15 | downloader = FMADownloader()
16 | downloader.downloadAndExtract()
17 | 
18 | exampleProcessor = ExampleProcessor(gapLength=1024, sideLength=2048, hopSize=512, gapMinRMS=1e-3)
19 | 
20 | tfRecordGenerator = FMATFRecordGenerator(baseName='FMA-test', pathToDataFolder=downloader.TEST_DIR, exampleProcessor=exampleProcessor)
21 | tfRecordGenerator.generateDataset()
22 | 
23 | tfRecordGenerator = FMATFRecordGenerator(baseName='FMA-valid', pathToDataFolder=downloader.VALID_DIR, exampleProcessor=exampleProcessor)
24 | tfRecordGenerator.generateDataset()
25 | 
26 | tfRecordGenerator = FMATFRecordGenerator(baseName='FMA-train', pathToDataFolder=downloader.TRAIN_DIR, exampleProcessor=exampleProcessor)
27 | tfRecordGenerator.generateDataset()
28 | 


--------------------------------------------------------------------------------
/architecture/architecture.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | __author__ = 'Andres'
 4 | 
 5 | 
 6 | class Architecture(object):
 7 |     def __init__(self):
 8 |         self._isTraining = tf.placeholder(tf.bool, name='is_training')
 9 |         self._input = tf.placeholder(tf.float32, shape=self.inputShape(), name='input_data')
10 |         self._output = self._network(self._input)
11 |         self._target = tf.placeholder(tf.float32, shape=self._output.shape, name='target_data')
12 |         self._lossSummaries = []
13 |         self._loss = self._lossGraph()
14 | 
15 |     def input(self):
16 |         return self._input
17 | 
18 |     def output(self):
19 |         return self._output
20 | 
21 |     def target(self):
22 |         return self._target
23 | 
24 |     def loss(self):
25 |         return self._loss
26 | 
27 |     def lossSummaries(self):
28 |         return self._lossSummaries
29 | 
30 |     def isTraining(self):
31 |         return self._isTraining
32 | 
33 |     def _lossGraph(self):
34 |         raise NotImplementedError("Subclass Responsibility")
35 | 
36 |     def _network(self, data):
37 |         raise NotImplementedError("Subclass Responsibility")
38 | 
39 |     def inputShape(self):
40 |         raise NotImplementedError("Subclass Responsibility")
41 | 


--------------------------------------------------------------------------------
/utils/legacy/plotSummary.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | matplotlib.use('Agg')
 3 | import matplotlib.pyplot as plt
 4 | import io
 5 | import tensorflow as tf
 6 | 
 7 | __author__ = 'Andres'
 8 | 
 9 | 
10 | class PlotSummary(object):
11 |     def __init__(self, name):
12 |         self._name = name
13 |         self._placeholder = tf.placeholder(tf.uint8, (None, None, None, None))
14 |         self._summary = tf.summary.image(name, self._placeholder)
15 |         self._image = None
16 | 
17 |     def produceSummaryToWrite(self, session):
18 |         decoded_image = session.run(self._image)
19 |         feed_dict = {self._placeholder: decoded_image}
20 |         return session.run(self._summary, feed_dict=feed_dict)
21 | 
22 |     def plotSideBySide(self, out_gaps, reconstructed):
23 |         f, axarr = plt.subplots(4, 2, sharey='row')
24 |         f.set_size_inches(14, 24)
25 |         stop_value = 4
26 |         for i in range(0, stop_value):
27 |             axarr[i, 0].plot(out_gaps[i])
28 |             axarr[i, 1].plot(reconstructed[i])
29 | 
30 |         buf = io.BytesIO()
31 |         plt.savefig(buf, format='png')
32 |         plt.close()
33 |         buf.seek(0)
34 |         image = tf.image.decode_png(buf.getvalue(), channels=4)
35 |         image = tf.expand_dims(image, 0)
36 |         self._image = image


--------------------------------------------------------------------------------
/datasetGenerator/nSynthDownloader.py:
--------------------------------------------------------------------------------
 1 | from datasetGenerator.downloader import Downloader
 2 | 
 3 | __author__ = 'Andres'
 4 | 
 5 | 
 6 | class NSynthDownloader(Downloader):
 7 |     TRAIN_LINK = "http://download.magenta.tensorflow.org/datasets/nsynth/nsynth-train.jsonwav.tar.gz"
 8 |     VALID_LINK = "http://download.magenta.tensorflow.org/datasets/nsynth/nsynth-valid.jsonwav.tar.gz"
 9 |     TEST_LINK = "http://download.magenta.tensorflow.org/datasets/nsynth/nsynth-test.jsonwav.tar.gz"
10 | 
11 |     TRAIN_FILENAME = "nsynth_train.tar.gz"
12 |     VALID_FILENAME = "nsynth_valid.tar.gz"
13 |     TEST_FILENAME = "nsynth_test.tar.gz"
14 | 
15 |     TRAIN_DIR = "nsynth-train/audio"
16 |     VALID_DIR = "nsynth-valid/audio"
17 |     TEST_DIR = "nsynth-test/audio"
18 | 
19 |     def _downloadLinksAndFilenames(self):
20 |         return [(self.TEST_LINK, self.TEST_FILENAME),
21 |                 (self.TRAIN_LINK, self.TRAIN_FILENAME),
22 |                 (self.VALID_LINK, self.VALID_FILENAME)]
23 | 
24 |     def _extractCompressedFile(self, filename):
25 |         self._extractTar(filename)
26 | 
27 |     def _divideDataIntoTrainValidAndTestSubsets(self):
28 |         print('NSynth dataset comes divided into training, validation and testing subsets.')
29 | 
30 | if __name__ == "__main__":
31 |     down = NSynthDownloader()
32 |     down.downloadAndExtract()
33 | 


--------------------------------------------------------------------------------
/LPC-based extrapolation/lpcPaper.m:
--------------------------------------------------------------------------------
 1 | contextLength = 2048;
 2 | targetLength = 1024;
 3 | contextRatio = ceil(contextLength/targetLength);
 4 | maxLag = 1000;
 5 | 
 6 | audioFilePath = 'audio/bass_electronic_018-045-075.wav';
 7 | [audio, Fs] = audioread(audioFilePath);
 8 | 
 9 | t = linspace(0, pi/2, targetLength)';
10 | sqCos = cos(t).^2;
11 | 
12 | rec_signal = [];
13 | SNR = [];
14 | 
15 | for i = contextRatio:(length(audio)/targetLength)-contextRatio-2
16 |     previous_sig = audio(targetLength*(i-contextRatio)+1:targetLength*(i));
17 |     target_sig = audio(targetLength*(i)+1:targetLength*(i+1));
18 |     next_sig = audio(targetLength*(i+1)+1:targetLength*(i+contextRatio+1));
19 |     
20 |     if rms(target_sig) < 1e-4
21 |         SNR(length(SNR)+1) = -1;
22 |         rec_signal = cat(1, rec_signal, zeros([targetLength, 1]));
23 |         continue
24 |     end  
25 | 
26 |     ab = arburg(previous_sig, maxLag);
27 |     Zb = filtic(1,ab,previous_sig(end-(0:(maxLag-1))));
28 |     forw_pred = filter(1,ab,zeros(1,targetLength),Zb)';
29 | 
30 |     next_sig = flipud(next_sig);
31 |     af = arburg(next_sig, maxLag);
32 |     Zf = filtic(1,af, next_sig(end-(0:(maxLag-1))));
33 |     backw_pred = flipud(filter(1,af,zeros(1,targetLength),Zf)');
34 |     
35 |     sigout = sqCos.*forw_pred + flipud(sqCos).*backw_pred;
36 |     rec_signal = cat(1, rec_signal, sigout);
37 |     SNR(length(SNR)+1) = mySNR(target_sig, sigout);
38 | end
39 | 
40 | 
41 | fprintf('mean SNR where it was calculated is %f \n', mean(SNR(SNR~=-1)));
42 | fprintf('max SNR is %f \n', max(SNR));
43 | fprintf('SNR is not calculated at %d places \n', length(find(SNR==-1)));
44 | 
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Audio inpainting with a context encoder
 2 | 
 3 | This project accompanies the research work on audio inpainting of small gaps done at the Acoustics Research Institute in Vienna collaborating with the Swiss Data Science Center. The paper was [published at IEEE TASLP](https://ieeexplore.ieee.org/document/8867915) available now: https://ieeexplore.ieee.org/document/8867915.
 4 | 
 5 | # Installation
 6 | 
 7 | Install the requirements with `pip install -r requirements.txt`. For windows users, the numpy version should be 1.14.0+mkl (find it [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/)). For the FMA dataset, librosa requires ffmpeg as an mp3 backend. 
 8 | 
 9 | # Instructions
10 | The paper uses both google's Nsynth dataset and the FMA dataset. In order to recreate the used dataset, execute in the parent folder either `python make_nsynthdataset.py` or  `python make_fmadataset.py`. The output of the scripts are three `tfrecord` files for training, validating and testing the model.
11 |  
12 | The default parameters for the network come pickled in the file `magnitude_network_parameters.pkl` and `complex_network_parameters.pkl`. In order to make other architectures use [saveParameters.py](utils/saveParameters.py).
13 |  
14 | To train the network, execute in the parent folder `python trainMagnitudeNetwork.py` or `python trainComplexNetwork.py`. This will train the network for 600k steps with a learning rate of 1e-3. You can select on which tfrecords to train the network, the script assumes you have created the nsynth dataset.
15 | 
16 | ## Sound examples
17 | 
18 | - To hear examples please go to the [accompanying website](https://andimarafioti.github.io/audioContextEncoder/).
19 | 


--------------------------------------------------------------------------------
/architecture/parameters/contextEncoderParameters.py:
--------------------------------------------------------------------------------
 1 | class ContextEncoderParameters(object):
 2 |     INPUT_CHANNELS = 4  # 2 sides, one for real and one for imag
 3 | 
 4 |     def __init__(self, batchSize, signalLength, gapLength, fftWindowLength, fftHopSize,
 5 |                  encoderParameters, fullyConnectedLayerParameters, decoderParameters):
 6 |         self._batchSize = int(batchSize)
 7 |         self._signalLength = int(signalLength)
 8 |         self._gapLength = int(gapLength)
 9 |         self._fftWindowLength = int(fftWindowLength)
10 |         self._fftHopSize = int(fftHopSize)
11 |         self._encoderParameters = encoderParameters
12 |         self._fullyConnectedLayerParameters = fullyConnectedLayerParameters
13 |         self._decoderParameters = decoderParameters
14 | 
15 |     def architectureParameters(self):
16 |         return [self.inputShape(), self._encoderParameters, self._decoderParameters, self._fullyConnectedLayerParameters]
17 | 
18 |     def preProcessorParameters(self):
19 |         return [self._signalLength, self._gapLength, self._fftWindowLength, self._fftHopSize]
20 | 
21 |     def fftHopSize(self):
22 |         return self._fftHopSize
23 | 
24 |     def fftWindowLength(self):
25 |         return self._fftWindowLength
26 | 
27 |     def batchSize(self):
28 |         return self._batchSize
29 | 
30 |     def inputShape(self):
31 |         return self._batchSize, self.contextStftFrameCount(), self._fftFreqBins(), self.INPUT_CHANNELS
32 | 
33 |     def contextStftFrameCount(self):
34 |         return int(((self._signalLength - self._gapLength) / 2) / self._fftHopSize)
35 | 
36 |     def _fftFreqBins(self):
37 |         return self._fftWindowLength//2+1
38 | 
39 | 


--------------------------------------------------------------------------------
/utils/colorize.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | import matplotlib.cm
 3 | 
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | 
 7 | 
 8 | def colorize(value, vmin=None, vmax=None, cmap=None):
 9 | 	"""
10 | 	A utility function for TensorFlow that maps a grayscale image to a matplotlib
11 | 	colormap for use with TensorBoard image summaries.
12 | 	By default it will normalize the input value to the range 0..1 before mapping
13 | 	to a grayscale colormap.
14 | 	Arguments:
15 | 	  - value: 2D Tensor of shape [height, width] or 3D Tensor of shape
16 | 		[height, width, 1].
17 | 	  - vmin: the minimum value of the range used for normalization.
18 | 		(Default: value minimum)
19 | 	  - vmax: the maximum value of the range used for normalization.
20 | 		(Default: value maximum)
21 | 	  - cmap: a valid cmap named for use with matplotlib's `get_cmap`.
22 | 		(Default: 'gray')
23 | 	Example usage:
24 | 	```
25 | 	output = tf.random_uniform(shape=[256, 256, 1])
26 | 	output_color = colorize(output, vmin=0.0, vmax=1.0, cmap='viridis')
27 | 	tf.summary.image('output', output_color)
28 | 	```
29 | 
30 | 	Returns a 3D tensor of shape [height, width, 3].
31 | 	"""
32 | 
33 | 	# normalize
34 | 	vmin = tf.reduce_min(value) if vmin is None else vmin
35 | 	vmax = tf.reduce_max(value) if vmax is None else vmax
36 | 	value = (value - vmin) / (vmax - vmin)  # vmin..vmax
37 | 
38 | 	# squeeze last dim if it exists
39 | 	value = tf.squeeze(value)
40 | 
41 | 	# quantize
42 | 	indices = tf.to_int32(tf.round(value * 255))
43 | 
44 | 	# gather
45 | 	cm = matplotlib.cm.get_cmap(cmap if cmap is not None else 'viridis')
46 | 	colors = cm(np.arange(256))[:, :3]
47 | 	colors = tf.constant(colors, dtype=tf.float32)
48 | 	value = tf.gather(colors, indices)
49 | 
50 | 	return value
51 | 


--------------------------------------------------------------------------------
/datasetGenerator/fmaDownloader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | from datasetGenerator.downloader import Downloader
 4 | 
 5 | __author__ = 'Andres'
 6 | 
 7 | 
 8 | class FMADownloader(Downloader):
 9 |     SMALL_LINK = 'https://os.unil.cloud.switch.ch/fma/fma_small.zip'
10 |     SMALL_FILENAME = 'fma_small.zip'
11 |     SMALL_DIR = SMALL_FILENAME[:-4]
12 | 
13 |     TRAIN_DIR = 'FMA-train'
14 |     VALID_DIR = 'FMA-valid'
15 |     TEST_DIR = 'FMA-test'
16 | 
17 |     def _downloadLinksAndFilenames(self):
18 |         return [(self.SMALL_LINK, self.SMALL_FILENAME)]
19 | 
20 |     def _extractCompressedFile(self, filename):
21 |         self._extractZip(filename)
22 | 
23 |     def _divideDataIntoTrainValidAndTestSubsets(self):
24 |         print('Dividing FMA dataset into training, validation and testing subsets.')
25 |         for dir_name in [self.TRAIN_DIR, self.VALID_DIR, self.TEST_DIR]:
26 |             try:
27 |                 os.mkdir(dir_name)
28 |             except FileExistsError as e:
29 |                 print('Directory already existed, proceed with caution.\nException:', e)
30 | 
31 |         i = 0
32 |         for path, directory_name, file_names in os.walk(self.SMALL_DIR):
33 |             for file_name in file_names:
34 |                 i += 1
35 |                 if i < 8:
36 |                     os.rename(path + '/' + file_name, self.TRAIN_DIR + '/' + file_name)
37 |                 elif i < 10:
38 |                     os.rename(path + '/' + file_name, self.VALID_DIR + '/' + file_name)
39 |                 elif i == 10:
40 |                     os.rename(path + '/' + file_name, self.TEST_DIR + '/' + file_name)
41 |                     i = 0
42 |         shutil.rmtree(self.SMALL_DIR)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     down = FMADownloader()
47 |     down.downloadAndExtract()
48 | 


--------------------------------------------------------------------------------
/datasetGenerator/fakeTFRecordGenerator.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import time
 4 | import os
 5 | import sys
 6 | from datasetGenerator.tfRecordGenerator import TFRecordGenerator
 7 | 
 8 | __author__ = 'Andres'
 9 | 
10 | 
11 | class FakeTFRecordGenerator(TFRecordGenerator):
12 |     def generateDataset(self):
13 |         start = time.time()
14 | 
15 |         train_filename = self.name() + '.tfrecords'
16 |         writer = tf.python_io.TFRecordWriter(train_filename)
17 | 
18 |         print("start:", start)
19 |         count = 0
20 |         total = 0
21 | 
22 |         _sampling_rate = 16000
23 |         _window_size = 5120
24 |         _time = np.arange(0, _window_size / _sampling_rate, 1 / _sampling_rate)
25 |         _low_freq = np.arange(0, 2000, 40)
26 |         _mid_low_freq = np.arange(2000, 4000, 40)
27 |         _mid_high_freq = np.arange(4000, 6000, 40)
28 |         _high_freq = np.arange(6000, 8000, 40)
29 | 
30 |         for low_freq in _low_freq:
31 |             for mid_low_freq in _mid_low_freq:
32 |                 for mid_high_freq in _mid_high_freq:
33 |                     for high_freq in _high_freq:
34 |                         audio = np.sin(2 * np.pi * low_freq * _time) + np.sin(2 * np.pi * mid_low_freq * _time) + \
35 |                                 np.sin(2 * np.pi * mid_high_freq * _time) + np.sin(2 * np.pi * high_freq * _time)
36 | 
37 |                         self._createFeature(audio, writer)
38 | 
39 |                         count, total = self._notifyIfNeeded(count + 1, total)
40 |                         sys.stdout.flush()
41 |         writer.close()
42 |         end = time.time() - start
43 | 
44 |         print("there were: ", total + count)
45 |         print("wow, that took", end, "seconds... might want to change that to mins :)")
46 | 
47 | 
48 |     def _filenameShouldBeLoaded(self, filename):
49 |         raise NotImplementedError("We fake bro")
50 | 


--------------------------------------------------------------------------------
/LPC-based extrapolation/lpcInFolder.m:
--------------------------------------------------------------------------------
 1 | contextLength = 2048;
 2 | targetLength = 1024;
 3 | contextRatio = ceil(contextLength/targetLength);
 4 | maxLag = 1000;
 5 | 
 6 | folder = 'fma';
 7 | extension = 'mp3';
 8 | audiofiles = dir(strcat(folder, '/*', extension));
 9 | allSNR = [];
10 | 
11 | for file = audiofiles'
12 | 
13 | fprintf(1,'Inpainting %s\n', file.name)
14 | [audio, Fs]=audioread(strcat(folder, '/', file.name));
15 | 
16 | t = linspace(0, pi/2, targetLength)';
17 | sqCos = cos(t).^2;
18 | 
19 | SNR = [];
20 | 
21 | for i = contextRatio:(length(audio)/targetLength)-contextRatio-2
22 |     previous_sig = audio(targetLength*(i-contextRatio)+1:targetLength*(i));
23 |     target_sig = audio(targetLength*(i)+1:targetLength*(i+1));
24 |     next_sig = audio(targetLength*(i+1)+1:targetLength*(i+contextRatio+1));
25 |     
26 |     if rms(target_sig) < 1e-4
27 |         continue
28 |     end  
29 | 
30 |     ab = arburg(previous_sig, maxLag);
31 |     Zb = filtic(1,ab,previous_sig(end-(0:(maxLag-1))));
32 |     forw_pred = filter(1,ab,zeros(1,targetLength),Zb)';
33 | 
34 |     next_sig = flipud(next_sig);
35 |     af = arburg(next_sig, maxLag);
36 |     Zf = filtic(1,af, next_sig(end-(0:(maxLag-1))));
37 |     backw_pred = flipud(filter(1,af,zeros(1,targetLength),Zf)');
38 |     
39 |     sigout = sqCos.*forw_pred + flipud(sqCos).*backw_pred;
40 |     SNR(length(SNR)+1) = mySNR(target_sig, sigout);
41 | end
42 | 
43 | fprintf('mean SNR is %f \n', mean(SNR));
44 | 
45 | allSNR = cat(2, SNR, allSNR);
46 | 
47 | end
48 | 
49 | allSNR(isnan(allSNR)) = 0;
50 | 
51 | fprintf('mean SNR is %f \n', mean(allSNR));
52 | fprintf('std SNR is %f \n', std(allSNR));
53 | fprintf('min SNR is %f \n', min(allSNR));
54 | fprintf('25%% percentile SNR is %f \n', prctile(allSNR, 25));
55 | fprintf('50%% percentile SNR is %f \n', prctile(allSNR, 50));
56 | fprintf('75%% percentile SNR is %f \n', prctile(allSNR, 75));
57 | fprintf('max SNR is %f \n', max(SNR));
58 | 


--------------------------------------------------------------------------------
/utils/tfReader.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Andres'
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.python.framework.errors_impl import OutOfRangeError
 5 | 
 6 | 
 7 | class TFReader(object):
 8 |     def __init__(self, path_to_tfRecord_file, window_size, batchSize, num_epochs=10, capacity=100000):
 9 |         self._path_to_tfRecord_file = path_to_tfRecord_file
10 |         self._capacity = capacity
11 |         self._batchSize = batchSize
12 |         self._window_size = window_size
13 |         self._audios = self._read_and_decode(tf.train.string_input_producer([path_to_tfRecord_file],
14 |                                                                             num_epochs=num_epochs))
15 | 
16 |     def start(self):
17 |         self._coordinator = tf.train.Coordinator()
18 |         self._threads = tf.train.start_queue_runners(coord=self._coordinator)
19 | 
20 |     def dataOperation(self, session):
21 |         try:
22 |             audios = session.run(self._audios)
23 |             return audios
24 |         except OutOfRangeError:
25 |             raise StopIteration
26 | 
27 |     def finish(self):
28 |         self._coordinator.request_stop()
29 |         self._coordinator.join(self._threads)
30 | 
31 |     def _read_and_decode(self, filename_queue):
32 |         reader = tf.TFRecordReader()
33 |         _, serialized_example = reader.read(filename_queue)
34 |         features = tf.parse_single_example(serialized_example,
35 |                                            features={'valid/windows': tf.FixedLenFeature([], tf.string)})
36 | 
37 |         windows = tf.decode_raw(features['valid/windows'], tf.float32)
38 |         windows = tf.reshape(windows, [self._window_size])
39 | 
40 |         audios = tf.train.shuffle_batch([windows], batch_size=self._batchSize,
41 |                                         min_after_dequeue=int(self._capacity * 0.5),
42 |                                         capacity=self._capacity,
43 |                                         num_threads=4)
44 |         return audios
45 | 


--------------------------------------------------------------------------------
/utils/saveParameters.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import sys
 4 | 
 5 | from architecture.parameters.contextEncoderParameters import ContextEncoderParameters
 6 | 
 7 | sys.path.append('.')  # In case we launch this from the base folder
 8 | 
 9 | __author__ = 'Andres'
10 | 
11 | from architecture.parameters.convNetworkParams import ConvNetworkParams
12 | from architecture.parameters.fullyLayerParams import FullyLayerParams
13 | 
14 | "Simple script to save parameters"
15 | 
16 | architecturesParametersFile = "magnitude_network_parameters.pkl"
17 | 
18 | batchSize = 256
19 | signalLength = 5120
20 | gapLength = 1024
21 | fftWindowLength = 512
22 | fftHopSize = 128
23 | 
24 | encoderParams = ConvNetworkParams(filterShapes=[(7, 89), (3, 17), (2, 11),
25 |                                                 (1, 9), (1, 5), (2, 5)],
26 |                                   channels=[4, 32, 128, 512,
27 |                                             256, 160, 128],
28 |                                   strides=[[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1],
29 |                                            [1, 1, 2, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
30 |                                   name='Encoder')
31 | 
32 | fullyParams = FullyLayerParams(inputShape=(batchSize, 128, 2, 8), outputShape=(batchSize, 8, 8, 32), name="Fully")
33 | 
34 | decoderParams = ConvNetworkParams(filterShapes=[(8, 8), (5, 5), (3, 3), (5, 67), (11, 257)],
35 |                                   channels=[32, 128, 512, 257, 11, 1],
36 |                                   strides=[[1, 2, 2, 1], [1, 2, 2, 1], [1, 1, 1, 1],
37 |                                            [1, 2, 2, 1], [1, 1, 1, 1]],
38 |                                   name='Decoder')
39 | 
40 | contextEncoderParameters = ContextEncoderParameters(batchSize, signalLength, gapLength, fftWindowLength, fftHopSize,
41 |                                                     encoderParams, fullyParams, decoderParams)
42 | 
43 | with open(architecturesParametersFile, 'wb') as fiModel:
44 |     pickle.dump(contextEncoderParameters, fiModel)
45 | 


--------------------------------------------------------------------------------
/datasetGenerator/downloader.py:
--------------------------------------------------------------------------------
 1 | import ssl
 2 | import urllib.request
 3 | import tarfile
 4 | import zipfile
 5 | import os
 6 | 
 7 | __author__ = 'Andres'
 8 | 
 9 | 
10 | class Downloader(object):
11 |     def downloadAndExtract(self):
12 |         for link, filename in self._downloadLinksAndFilenames():
13 |             self._download(link, filename)
14 |             self._extractCompressedFile(filename)
15 |             self._deleteCompressedFile(filename)
16 |         self._divideDataIntoTrainValidAndTestSubsets()
17 | 
18 |     def _download(self, aLink, toAFilename):
19 |         print("Downloading to ", toAFilename)
20 |         size = 0
21 |         blocksize = 4096
22 | 
23 |         with urllib.request.urlopen(aLink, context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) as response, \
24 |                 open(toAFilename, 'wb') as out_file:  # context avoids SSL certifications
25 |             length = float(response.getheader('content-length'))
26 |             data = response.read(blocksize)
27 |             out_file.write(data)
28 |             while data:
29 |                 size += len(data)
30 |                 print('\r Downloaded {:.2f} % '.format(100 * size / length), end='')
31 |                 data = response.read(blocksize)
32 |                 out_file.write(data)
33 |             print('')
34 | 
35 |     def _deleteCompressedFile(self, filename):
36 |         print('Deleting', filename)
37 |         os.remove(filename)
38 | 
39 |     def _extractTar(self, aFile):
40 |         print('Extracting', aFile)
41 |         tar = tarfile.open(aFile)
42 |         tar.extractall()
43 |         tar.close()
44 | 
45 |     def _extractZip(self, aFile):
46 |         print('Extracting', aFile)
47 |         zip_ref = zipfile.ZipFile(aFile, 'r')
48 |         zip_ref.extractall()
49 |         zip_ref.close()
50 | 
51 |     def _extractCompressedFile(self, filename):
52 |         raise NotImplementedError("Subclass Responsibility")
53 | 
54 |     def _downloadLinksAndFilenames(self):
55 |         raise NotImplementedError("Subclass Responsibility")
56 | 
57 |     def _divideDataIntoTrainValidAndTestSubsets(self):
58 |         raise NotImplementedError("Subclass Responsibility")
59 | 


--------------------------------------------------------------------------------
/utils/legacy/stftPhaseContextEncoder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from utils.legacy.stftGapContextEncoder import StftGapContextEncoder
 5 | 
 6 | __author__ = 'Andres'
 7 | 
 8 | 
 9 | class StftPhaseContextEncoder(StftGapContextEncoder):
10 |     def _loss_graph(self):
11 |         with tf.variable_scope("Loss"):
12 |             gap_stft = self._target_model.output()
13 | 
14 |             abs_stft = tf.reshape(gap_stft[:, :, :, 0], (self._batch_size, 11, 257, 1))
15 |             target_angle = abs_stft * tf.reshape(gap_stft[:, :, :, 1], (self._batch_size, 11, 257, 1))
16 | 
17 |             norm_orig = self._squaredEuclideanNorm(target_angle, onAxis=[1, 2, 3])
18 |             norm_orig_summary = tf.summary.scalar("norm_orig", tf.reduce_min(norm_orig))
19 | 
20 |             error = target_angle - (self._reconstructed_input_data * abs_stft)
21 |             error_per_example = tf.reduce_sum(tf.square(error), axis=[1, 2, 3])
22 | 
23 |             reconstruction_loss = 0.5 * tf.reduce_sum(error_per_example * (1 + 5 / (norm_orig + 1e-2)))
24 | 
25 |             rec_loss_summary = tf.summary.scalar("reconstruction_loss", reconstruction_loss)
26 | 
27 |             trainable_vars = tf.trainable_variables()
28 |             lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars if 'bias' not in v.name]) * 1e-2
29 |             l2_loss_summary = tf.summary.scalar("lossL2", lossL2)
30 | 
31 |             total_loss = tf.add_n([reconstruction_loss, lossL2])
32 |             total_loss_summary = tf.summary.scalar("total_loss", total_loss)
33 | 
34 |             self._lossSummaries = tf.summary.merge(
35 |                 [rec_loss_summary, l2_loss_summary, norm_orig_summary, total_loss_summary])
36 | 
37 |             return total_loss
38 | 
39 |     def _evaluateValidSNR(self, summaries_dict, validReader, evalWriter, writer, sess, step):
40 |         reconstructed, out_gaps = self._reconstruct(sess, validReader, max_steps=8)
41 |         reconstructed = np.reshape(reconstructed, (self._batch_size*8, 11, 257, 1))
42 |         step_valid_SNR = evalWriter.evaluateImages(reconstructed, np.reshape(out_gaps[:, :, :, 1], (self._batch_size*8, 11, 257, 1)), self._initial_model_num + step)
43 |         validSNRSummaryToWrite = sess.run(summaries_dict['valid_SNR_summary'],
44 |                                           feed_dict={summaries_dict['valid_SNR']: step_valid_SNR})
45 |         writer.add_summary(validSNRSummaryToWrite, self._initial_model_num + step)


--------------------------------------------------------------------------------
/utils/legacy/simulations/stft_istft_tfReconstructionTest.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import tensorflow as tf
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | import time
 7 | from tensorflow.contrib.signal.python.ops import window_ops
 8 | 
 9 | __author__ = 'Andres'
10 | 
11 | 
12 | s = tf.Session()
13 | 
14 | sampling_rate = 44000
15 | freq = 210
16 | countOfCycles = 4
17 | _time = tf.range(0, 1024 / sampling_rate, 1 / sampling_rate, dtype=tf.float32)
18 | firstSignal = tf.sin(2 * 3.14159 * freq * _time)
19 | 
20 | fft_frame_length = 512
21 | fft_frame_step = 128
22 | window_fn = functools.partial(window_ops.hann_window, periodic=True)
23 | inverse_window = tf.contrib.signal.inverse_stft_window_fn(fft_frame_step,
24 |                                            forward_window_fn=window_fn)
25 | 
26 | firstSignal = tf.concat([tf.zeros(fft_frame_length-fft_frame_step), firstSignal, tf.zeros(fft_frame_length-fft_frame_step)], axis=0)
27 | s.run(tf.initialize_all_variables())
28 | stft = tf.contrib.signal.stft(signals=firstSignal, frame_length=fft_frame_length, frame_step=fft_frame_step,
29 |                               fft_length=fft_frame_length, window_fn=window_fn)
30 | istft = tf.contrib.signal.inverse_stft(stfts=stft, frame_length=fft_frame_length, frame_step=fft_frame_step,
31 |                                        window_fn=inverse_window)
32 | 
33 | stft_times = []
34 | istft_times = []
35 | for x in range(1):
36 |     t = time.time()
37 |     s.run(stft)
38 |     stft_times.append(time.time()-t)
39 |     print('stft took:', stft_times[-1])
40 |     t = time.time()
41 |     s.run(istft)
42 |     istft_times.append(time.time()-t)
43 |     print('istft took:', istft_times[-1])
44 | 
45 | print(stft_times)
46 | print(istft_times)
47 | print(np.mean(stft_times))
48 | print(np.mean(istft_times))
49 | 
50 | 
51 | with tf.Session() as sess:
52 |     t, original, stft_t, reconstructed = sess.run([_time, firstSignal, stft, istft])
53 | 
54 | def _pavlovs_SNR(y_orig, y_inp):
55 |     norm_y_orig = np.linalg.norm(y_orig) + 1e-10
56 |     norm_y_orig_minus_y_inp = np.linalg.norm(y_orig - y_inp)
57 |     return 10 * np.log10((abs(norm_y_orig ** 2)) / abs((norm_y_orig_minus_y_inp ** 2)))
58 | 
59 | print(_pavlovs_SNR(original, reconstructed))
60 | 
61 | ax1 = plt.subplot(211)
62 | plt.plot(original)
63 | plt.plot(reconstructed)
64 | plt.subplot(212)
65 | print(np.transpose(np.abs(stft_t)).shape)
66 | plt.pcolormesh(np.transpose(np.abs(stft_t)))
67 | plt.show()
68 | 
69 | 


--------------------------------------------------------------------------------
/utils/legacy/evaluationWriter.py:
--------------------------------------------------------------------------------
 1 | # import pandas as pd
 2 | import numpy as np
 3 | 
 4 | __author__ = 'Andres'
 5 | 
 6 | 
 7 | class EvaluationWriter(object):
 8 |     def __init__(self, excelFileName):
 9 |         # self._writer = pd.ExcelWriter(excelFileName)
10 |         self._index = 0
11 | 
12 |     def evaluate(self, reconstructed, original_gaps, step):
13 |         assert (len(original_gaps) == len(reconstructed))
14 | 
15 |         SNRs = self._pavlovs_SNR(original_gaps, reconstructed)
16 | 
17 |         norm_orig = self._squaredEuclideanNorm(original_gaps) / 5
18 |         error = original_gaps - reconstructed
19 |         reconstruction_loss = 0.5 * np.sum(np.square(error), axis=1) * (1 + 1 / norm_orig)
20 | 
21 |         # df = pd.DataFrame({'SNRs ' + str(step): SNRs, 'reconstruction_loss ' + str(step): reconstruction_loss})
22 |         # df.describe().to_excel(self._writer, sheet_name='general', startcol=self._index, index=not self._index)
23 |         self._index += 3
24 |         return np.mean(SNRs)
25 | 
26 |     def evaluateImages(self, reconstructed, original_gaps, step):
27 |         print('original_gaps:', original_gaps.shape)
28 |         print('reconstructed:', reconstructed.shape)
29 |         assert (original_gaps.shape == reconstructed.shape)
30 | 
31 |         SNRs = self._pavlovs_SNR(original_gaps, reconstructed, onAxis=(1, 2, 3))
32 | 
33 |         # norm_orig = self._squaredEuclideanNorm(original_gaps, onAxis=(1, 2, 3)) / 5
34 |         # error = original_gaps - reconstructed
35 |         # reconstruction_loss = 0.5 * np.sum(np.square(error), axis=(1, 2, 3)) * (1 + 1 / norm_orig)
36 |         #
37 |         # # df = pd.DataFrame({'SNRs ' + str(step): SNRs, 'reconstruction_loss ' + str(step): reconstruction_loss})
38 |         # # df.describe().to_excel(self._writer, sheet_name='general', startcol=self._index, index=not self._index)
39 |         # self._index += 3
40 |         return np.mean(SNRs)
41 | 
42 |     def _pavlovs_SNR(self, y_orig, y_inp, onAxis=(1,)):
43 |         norm_y_orig = self._squaredEuclideanNorm(y_orig, onAxis)
44 |         norm_y_orig_minus_y_inp = self._squaredEuclideanNorm(y_orig - y_inp, onAxis)
45 |         return 10 * np.log10(norm_y_orig / norm_y_orig_minus_y_inp)
46 | 
47 |     def _squaredEuclideanNorm(self, vector, onAxis=(1,)):
48 |         squared = np.square(vector)
49 |         print('squared:', squared.shape)
50 |         summed = np.sum(squared, axis=onAxis)
51 |         return summed
52 | 
53 |     def save(self):
54 |         pass
55 |         # self._writer.save()
56 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatBigger.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from network.emptyTFGraph import EmptyTfGraph
 4 | from utils.legacy.contextEncoder import ContextEncoderNetwork
 5 | 
 6 | __author__ = 'Andres'
 7 | 
 8 | tf.reset_default_graph()
 9 | train_filename = 'train_full_w5120_g1024_h512_ex18978619.tfrecords'
10 | valid_filename = 'valid_full_w5120_g1024_h512_ex893971.tfrecords'
11 | 
12 | window_size = 5120
13 | gap_length = 1024
14 | batch_size = 256
15 | 
16 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
17 | 
18 | dataset = aModel.output()
19 | first_half = dataset[:, :(window_size - gap_length) // 2]
20 | second_half = dataset[:, (window_size - gap_length) // 2:]
21 | stacked_halfs = tf.stack([first_half, second_half], axis=2)
22 | aModel.setOutputTo(stacked_halfs)
23 | 
24 | with tf.variable_scope("Encoder"):
25 |     aModel.addReshape((batch_size, 1, (window_size - gap_length) // 2, 2))
26 |     filter_widths = [129, 65, 17, 9]
27 |     input_channels = [2, 32, 64, 128]
28 |     output_channels = [32, 64, 128, 256]
29 |     strides = [[1, 1, 4, 1]] * len(input_channels)
30 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv']
31 |     aModel.addSeveralConvLayers(filter_widths=filter_widths, input_channels=input_channels,
32 |                                 output_channels=output_channels, strides=strides, names=names)
33 | 
34 | aModel.addReshape((batch_size, 2048))
35 | aModel.addFullyConnectedLayer(2048, 2048, 'Fully')
36 | aModel.addRelu()
37 | aModel.addReshape((batch_size, 1, 8, 256))
38 | 
39 | 
40 | with tf.variable_scope("Decoder"):
41 |     filter_widths = [9, 17, 65]
42 |     input_channels = [256, 128, 64]
43 |     output_channels = [128, 64, 16]
44 |     strides = [[1, 1, 4, 1]] * len(input_channels)
45 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv']
46 |     aModel.addSeveralDeconvLayers(filter_widths=filter_widths, input_channels=input_channels,
47 |                                   output_channels=output_channels, strides=strides, names=names)
48 |     aModel.addDeconvLayerWithoutNonLin(filter_width=129, input_channels=16, output_channels=1,
49 |                                        stride=(1, 1, 2, 1), name="Last_Deconv")
50 |     aModel.addReshape((batch_size, gap_length))
51 | 
52 | aContextEncoderNetwork = ContextEncoderNetwork(model=aModel, batch_size=batch_size, window_size=window_size,
53 |                                                gap_length=gap_length, learning_rate=1e-5, name='nat_bigger')
54 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6, restore_num=474000)
55 | 


--------------------------------------------------------------------------------
/datasetGenerator/exampleProcessor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import librosa
 3 | 
 4 | __author__ = 'Andres'
 5 | 
 6 | 
 7 | class ExampleProcessor(object):
 8 |     def __init__(self, gapLength=1024, sideLength=2048, hopSize=512, gapMinRMS=1e-3):
 9 |         self._sideLength = sideLength
10 |         self._gapLength = gapLength
11 |         self._totalLength = gapLength + 2*sideLength
12 |         self._hopSize = hopSize
13 |         self._gapMinRMS = gapMinRMS
14 | 
15 |     def gapLength(self):
16 |         return self._gapLength
17 | 
18 |     def sideLength(self):
19 |         return self._sideLength
20 | 
21 |     def describe(self):
22 |         return "_w" + str(self._totalLength) + '_g' + str(self._gapLength) + '_h' + str(self._hopSize)
23 | 
24 |     def process(self, audio_signal):
25 |         audio_without_silence_at_beginning_and_end = self._trim_silence(audio_signal, frame_length=self._gapLength)
26 |         windowed_audio = self._window(audio_without_silence_at_beginning_and_end)
27 |         processed_windows = self._remove_examples_with_low_energy_in_gap(windowed_audio)
28 |         return processed_windows
29 | 
30 |     def _trim_silence(self, audio, frame_length=1024):
31 |         if audio.size < frame_length:
32 |             frame_length = audio.size
33 |         energy = librosa.feature.rmse(audio, frame_length=frame_length)
34 |         frames = np.nonzero(energy > self._gapMinRMS * 10)
35 |         indices = librosa.core.frames_to_samples(frames)[1]
36 | 
37 |         # Note: indices can be an empty array, if the whole audio was silence.
38 |         return audio[indices[0]:indices[-1]] if indices.size else audio[0:0]
39 | 
40 |     def _window(self, audio_signal):
41 |         window_count = int((len(audio_signal) - self._totalLength) / self._hopSize)
42 | 
43 |         windowed_audios = np.array([])
44 |         for window_index in range(int(window_count)):
45 |             initial_index = int(window_index * self._hopSize)
46 |             windowed_audios = np.append(windowed_audios, audio_signal[initial_index:initial_index + self._totalLength])
47 |         windowed_audios = np.reshape(windowed_audios, (-1, self._totalLength))
48 |         return windowed_audios
49 | 
50 |     def _remove_examples_with_low_energy_in_gap(self, windows):
51 |         begin = int(np.floor((self._totalLength - self._gapLength) / 2))
52 |         end = int(np.floor((self._totalLength + self._gapLength) / 2))
53 |         gaps = windows[:, begin:end]
54 | 
55 |         mask = np.where(np.sum(np.abs(gaps), axis=1) < self._gapLength * self._gapMinRMS)
56 |         processed_windows = np.delete(windows, mask, axis=0)
57 | 
58 |         return processed_windows
59 | 
60 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatNatBigger.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from network.emptyTFGraph import EmptyTfGraph
 4 | from utils.legacy.contextEncoder import ContextEncoderNetwork
 5 | 
 6 | __author__ = 'Andres'
 7 | 
 8 | tf.reset_default_graph()
 9 | train_filename = 'train_full_w5120_g1024_h512_ex18978619.tfrecords'
10 | valid_filename = 'valid_full_w5120_g1024_h512_ex893971.tfrecords'
11 | 
12 | window_size = 5120
13 | gap_length = 1024
14 | batch_size = 256
15 | 
16 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
17 | 
18 | dataset = aModel.output()
19 | first_half = dataset[:, :(window_size - gap_length) // 2]
20 | second_half = dataset[:, (window_size - gap_length) // 2:]
21 | stacked_halfs = tf.stack([first_half, second_half], axis=2)
22 | aModel.setOutputTo(stacked_halfs)
23 | 
24 | with tf.variable_scope("Encoder"):
25 |     aModel.addReshape((batch_size, 1, (window_size - gap_length) // 2, 2))
26 |     filter_shapes = [(1, 129), (1, 65), (1, 33), (1, 17), (1, 17), (1, 17)]
27 |     input_channels = [2, 32, 128, 512, 256, 128]
28 |     output_channels = [*input_channels[1:], 64]
29 |     strides = [[1, 1, 2, 1]] * len(input_channels)
30 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv', 'Six_Conv']
31 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
32 |                                 output_channels=output_channels, strides=strides, names=names)
33 | 
34 | aModel.addReshape((batch_size, 2048))
35 | aModel.addFullyConnectedLayer(2048, 2048, 'Fully')
36 | aModel.addRelu()
37 | aModel.addReshape((batch_size, 1, 32, 64))
38 | 
39 | with tf.variable_scope("Decoder"):
40 |     filter_shapes = [(1, 17), (1, 17), (1, 33), (1, 65), (1, 65)]
41 |     input_channels = [64, 128, 512, 256, 128]
42 |     output_channels = [*input_channels[1:], 16]
43 |     strides = [[1, 1, 2, 1]] * len(input_channels)
44 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv', 'Fourth_Deconv', 'Fifth_Deconv']
45 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
46 |                                   output_channels=output_channels, strides=strides, names=names)
47 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(1, 129), input_channels=16, output_channels=1,
48 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
49 |     aModel.addReshape((batch_size, gap_length))
50 | 
51 | aContextEncoderNetwork = ContextEncoderNetwork(model=aModel, batch_size=batch_size, window_size=window_size,
52 |                                                gap_length=gap_length, learning_rate=1e-5, name='nat_sec_bigg')
53 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6, restore_num=564425, per_process_gpu_memory_fraction=0.9)
54 | 
55 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftTest.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from network.emptyTFGraph import EmptyTfGraph
 4 | from utils.legacy.stftMagContextEncoder import StftTestContextEncoder
 5 | 
 6 | __author__ = 'Andres'
 7 | 
 8 | tf.reset_default_graph()
 9 | train_filename = 'test_full_w5120_g1024_h512_ex292266.tfrecords'
10 | valid_filename = 'test_full_w5120_g1024_h512_ex292266.tfrecords'
11 | 
12 | window_size = 5120
13 | gap_length = 1024
14 | batch_size = 256
15 | 
16 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
17 | 
18 | dataset = aModel.output()
19 | signal_length = window_size - gap_length
20 | first_half = dataset[:, :signal_length // 2]
21 | second_half = dataset[:, signal_length // 2:]
22 | stacked_halfs = tf.stack([first_half, second_half], axis=1)
23 | 
24 | with tf.name_scope('Energy_Spectogram'):
25 |     fft_frame_length = 512
26 |     fft_frame_step = 128
27 |     stft = tf.contrib.signal.stft(signals=stacked_halfs, frame_length=fft_frame_length, frame_step=fft_frame_step)
28 |     mag_stft = tf.abs(stft)    # (256, 2, 13, 257)
29 |     mag_stft = tf.reshape(mag_stft, (batch_size, 13, 257, 2))
30 |     aModel.setOutputTo(mag_stft)
31 | 	
32 | with tf.variable_scope("Encoder"):
33 |     filter_widths = [(3, 33), (2, 9), (1, 3)]
34 |     input_channels = [2, 32, 64]
35 |     output_channels = [32, 64, 128]
36 |     strides = [[1, 2, 4, 1], [1, 2, 4, 1], [1, 2, 4, 1]]
37 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv']
38 |     aModel.addSeveralConvLayers(filter_shapes=filter_widths, input_channels=input_channels,
39 |                                 output_channels=output_channels, strides=strides, names=names)
40 | 
41 | aModel.addReshape((batch_size, 1280))
42 | aModel.addFullyConnectedLayer(1280, 896, 'Fully')
43 | aModel.addRelu()
44 | aModel.addReshape((batch_size, 1, 7, 128))
45 | 
46 | with tf.variable_scope("Decoder"):
47 |     filter_widths = [(1, 5), (1, 9)]
48 |     input_channels = [128, 256]
49 |     output_channels = [256, 128]
50 |     strides = [[1, 1, 2, 1]] * len(input_channels)
51 |     names = ['First_Deconv', 'Second_Deconv']
52 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_widths, input_channels=input_channels,
53 |                                   output_channels=output_channels, strides=strides, names=names)
54 |     aModel.addReshape((batch_size, 1, 7, 512))
55 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(1, 3), input_channels=512, output_channels=257,
56 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
57 |     aModel.addReshape((batch_size, 7, 257))
58 | 
59 | aContextEncoderNetwork = StftTestContextEncoder(model=aModel, batch_size=batch_size, window_size=window_size,
60 |                                                gap_length=gap_length, learning_rate=1e-4, name='nat_mag_stft_2')
61 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
62 | 


--------------------------------------------------------------------------------
/utils/legacy/notebooks/train.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "scrolled": false
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import tensorflow as tf\n",
12 |     "from network.natContextEncoder import ContextEncoderNetwork"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": 2,
18 |    "metadata": {},
19 |    "outputs": [
20 |     {
21 |      "name": "stdout",
22 |      "output_type": "stream",
23 |      "text": [
24 |       "---------\n",
25 |       "ContextEncoder\n",
26 |       "---------\n",
27 |       "Tensor(\"stack:0\", shape=(256, 2048, 2), dtype=float32)\n",
28 |       "Tensor(\"Encoder/Reshape:0\", shape=(256, 1, 2048, 2), dtype=float32)\n",
29 |       "Tensor(\"Encoder/First_Conv/Relu:0\", shape=(256, 1, 512, 32), dtype=float32)\n",
30 |       "Tensor(\"Encoder/Second_Conv/Relu:0\", shape=(256, 1, 128, 64), dtype=float32)\n",
31 |       "Tensor(\"Encoder/Third_Conv/Relu:0\", shape=(256, 1, 32, 64), dtype=float32)\n",
32 |       "Tensor(\"Reshape:0\", shape=(256, 2048), dtype=float32)\n",
33 |       "Tensor(\"Fully/add:0\", shape=(256, 2048), dtype=float32)\n",
34 |       "Tensor(\"Reshape_1:0\", shape=(256, 1, 32, 64), dtype=float32)\n",
35 |       "Tensor(\"Decoder/First_Deconv/Relu:0\", shape=(256, 1, 128, 64), dtype=float32)\n",
36 |       "Tensor(\"Decoder/Second_Deconv/Relu:0\", shape=(256, 1, 512, 16), dtype=float32)\n",
37 |       "Tensor(\"Decoder/Last_Deconv/Relu:0\", shape=(256, 1, 1024, 1), dtype=float32)\n",
38 |       "Tensor(\"Decoder/Reshape:0\", shape=(256, 1024), dtype=float32)\n"
39 |      ]
40 |     }
41 |    ],
42 |    "source": [
43 |     "tf.reset_default_graph()\n",
44 |     "\n",
45 |     "train_filename = 'test_w5120_g1024_h512_ex63501.tfrecords'\n",
46 |     "valid_filename = 'test_w5120_g1024_h512_ex63501.tfrecords'\n",
47 |     "\n",
48 |     "aContextEncoderNetwork = ContextEncoderNetwork(batch_size=256, window_size=5120, gap_length=1024, \n",
49 |     "                                             learning_rate=1e-5, name='train')\n",
50 |     "# aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": [
59 |     "aContextEncoderNetwork.description()"
60 |    ]
61 |   }
62 |  ],
63 |  "metadata": {
64 |   "kernelspec": {
65 |    "display_name": "Python 3",
66 |    "language": "python",
67 |    "name": "python3"
68 |   },
69 |   "language_info": {
70 |    "codemirror_mode": {
71 |     "name": "ipython",
72 |     "version": 3
73 |    },
74 |    "file_extension": ".py",
75 |    "mimetype": "text/x-python",
76 |    "name": "python",
77 |    "nbconvert_exporter": "python",
78 |    "pygments_lexer": "ipython3",
79 |    "version": "3.6.2"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 2
84 | }
85 | 


--------------------------------------------------------------------------------
/utils/legacy/timeLiner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | import json
 4 | 
 5 | import tensorflow as tf
 6 | from tensorflow.contrib.layers import fully_connected as fc
 7 | from tensorflow.examples.tutorials.mnist import input_data
 8 | from tensorflow.python.client import timeline
 9 | 
10 | 
11 | class TimeLiner:
12 |     _timeline_dict = None
13 | 
14 |     def update_timeline(self, chrome_trace):
15 |         # convert crome trace to python dict
16 |         chrome_trace_dict = json.loads(chrome_trace)
17 |         # for first run store full trace
18 |         if self._timeline_dict is None:
19 |             self._timeline_dict = chrome_trace_dict
20 |         # for other - update only time consumption, not definitions
21 |         else:
22 |             for event in chrome_trace_dict['traceEvents']:
23 |                 # events time consumption started with 'ts' prefix
24 |                 if 'ts' in event:
25 |                     self._timeline_dict['traceEvents'].append(event)
26 | 
27 |     def save(self, f_name):
28 |         with open(f_name, 'w') as f:
29 |             json.dump(self._timeline_dict, f)
30 | 
31 | 
32 | batch_size = 100
33 | 
34 | inputs = tf.placeholder(tf.float32, [batch_size, 784])
35 | targets = tf.placeholder(tf.float32, [batch_size, 10])
36 | 
37 | with tf.variable_scope("layer_1"):
38 |     fc_1_out = fc(inputs, num_outputs=500, activation_fn=tf.nn.sigmoid)
39 | with tf.variable_scope("layer_2"):
40 |     fc_2_out = fc(fc_1_out, num_outputs=784, activation_fn=tf.nn.sigmoid)
41 | with tf.variable_scope("layer_3"):
42 |     logits = fc(fc_2_out, num_outputs=10)
43 | 
44 | loss = tf.reduce_mean(
45 |     tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))
46 | train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
47 | 
48 | if __name__ == '__main__':
49 |     mnist_save_dir = os.path.join(tempfile.gettempdir(), 'MNIST_data')
50 |     mnist = input_data.read_data_sets(mnist_save_dir, one_hot=True)
51 | 
52 |     config = tf.ConfigProto()
53 |     config.gpu_options.allow_growth = True
54 |     with tf.Session(config=config) as sess:
55 |         sess.run(tf.global_variables_initializer())
56 | 
57 |         options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
58 |         run_metadata = tf.RunMetadata()
59 |         many_runs_timeline = TimeLiner()
60 |         runs = 5
61 |         for i in range(runs):
62 |             batch_input, batch_target = mnist.train.next_batch(batch_size)
63 |             feed_dict = {inputs: batch_input,
64 |                          targets: batch_target}
65 | 
66 |             sess.run(train_op,
67 |                      feed_dict=feed_dict,
68 |                      options=options,
69 |                      run_metadata=run_metadata)
70 | 
71 |             fetched_timeline = timeline.Timeline(run_metadata.step_stats)
72 |             chrome_trace = fetched_timeline.generate_chrome_trace_format()
73 |             many_runs_timeline.update_timeline(chrome_trace)
74 |     many_runs_timeline.save('timeline_03_merged_%d_runs.json' % runs)
75 | 


--------------------------------------------------------------------------------
/datasetGenerator/tfRecordGenerator.py:
--------------------------------------------------------------------------------
 1 | import librosa
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import time
 5 | import os
 6 | import sys
 7 | 
 8 | from audioread import NoBackendError
 9 | 
10 | __author__ = 'Andres'
11 | 
12 | 
13 | class TFRecordGenerator(object):
14 |     """To generate a Dataset, instantiate this class with its arguments and call generateDataset()"""
15 | 
16 |     def __init__(self, baseName, pathToDataFolder, exampleProcessor, targetSamplingRate=16000, notifyEvery=10000):
17 |         self._pathToDataFolder = pathToDataFolder
18 |         self._exampleProcessor = exampleProcessor
19 |         self._notifyEvery = notifyEvery
20 |         self._targetSamplingRate = targetSamplingRate
21 |         self._baseName = baseName
22 | 
23 |     def name(self):
24 |         return self._baseName + self._exampleProcessor.describe()
25 | 
26 |     def generateDataset(self):
27 |         start = time.time()
28 | 
29 |         train_filename = self.name() + '.tfrecords'
30 |         writer = tf.python_io.TFRecordWriter(train_filename)
31 | 
32 |         print("start:", start)
33 |         count = 0
34 |         total = 0
35 | 
36 |         for file_name in os.listdir(self._pathToDataFolder):
37 |             if self._filenameShouldBeLoaded(file_name):
38 |                 try:
39 |                     audio, sr = librosa.load(self._pathToDataFolder + '/' + file_name, sr=self._targetSamplingRate)
40 |                 except NoBackendError:
41 |                     print("No backend for file:", file_name)
42 |                     continue
43 | 
44 |                 windows = self._exampleProcessor.process(audio)
45 |                 if windows.shape[0] is 0:
46 |                     print("Got a completely silenced signal! with path:", file_name)
47 |                     continue
48 | 
49 |                 for window in windows:
50 |                     self._createFeature(window, writer)
51 | 
52 |                 count, total = self._notifyIfNeeded(count + len(windows), total)
53 |                 sys.stdout.flush()
54 |         writer.close()
55 |         end = time.time() - start
56 | 
57 |         print("there were: ", total + count)
58 |         print("wow, that took", end, "seconds... might want to change that to mins :)")
59 | 
60 |     def _createFeature(self, window, writer):
61 |         window_bytes = window.astype(np.float32).tostring()
62 | 
63 |         example = tf.train.Example(features=tf.train.Features(feature={
64 |             'valid/windows': self._bytes_feature(window_bytes)}))
65 | 
66 |         writer.write(example.SerializeToString())
67 | 
68 |     def _bytes_feature(self, value):
69 |         return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
70 | 
71 |     def _filenameShouldBeLoaded(self, filename):
72 |         raise NotImplementedError("Subclass Responsibility")
73 | 
74 |     def _notifyIfNeeded(self, count, total):
75 |         if count > self._notifyEvery:
76 |             count -= self._notifyEvery
77 |             total += self._notifyEvery
78 |             print(self._notifyEvery, "plus!", time.time())
79 |             return count, total
80 |         return count, total
81 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftMagnitudeTest.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.contrib import slim
 3 | 
 4 | from network.emptyTFGraph import EmptyTfGraph
 5 | from utils.legacy.stftMagContextEncoder import StftTestContextEncoder
 6 | 
 7 | __author__ = 'Andres'
 8 | 
 9 | tf.reset_default_graph()
10 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
11 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
12 | 
13 | window_size = 5120
14 | gap_length = 1024
15 | batch_size = 256
16 | 
17 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="context encoder")
18 | 
19 | signal = aModel.output()
20 | 
21 | with tf.name_scope('Energy_Spectogram'):
22 |     fft_frame_length = 512
23 |     fft_frame_step = 128
24 |     stft = tf.contrib.signal.stft(signals=signal, frame_length=fft_frame_length, frame_step=fft_frame_step)
25 | 
26 |     sides_stft = tf.stack((stft[:, :15, :], stft[:, 15+7:, :]), axis=3)
27 | 
28 |     mag_stft = tf.abs(sides_stft)    # (256, 15, 257, 2)
29 |     aModel.setOutputTo(mag_stft)
30 | 
31 | with tf.variable_scope("Encoder"):
32 |     filter_shapes = [(7, 89), (3, 17), (2, 6), (1, 5), (1, 3)]
33 |     input_channels = [2, 32, 64, 128, 128]
34 |     output_channels = [32, 64, 128, 128, 200]
35 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 1, 2, 1], [1, 1, 1, 1]]
36 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
37 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
38 |                                 output_channels=output_channels, strides=strides, names=names)
39 | 
40 | aModel.addReshape((batch_size, 3200))
41 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
42 | aModel.addRelu()
43 | aModel.addReshape((batch_size, 8, 8, 32))
44 | 
45 | with tf.variable_scope("Decoder"):
46 |     filter_shapes = [(5, 5), (3, 3)]
47 |     input_channels = [32, 64]
48 |     output_channels = [64, 257]
49 |     strides = [[1, 2, 2, 1]] * len(input_channels)
50 |     names = ['First_Deconv', 'Second_Deconv']
51 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
52 |                                   output_channels=output_channels, strides=strides, names=names)
53 | 
54 |     aModel.addReshape((batch_size, 8, 257, 128))
55 |     aModel.addDeconvLayer(filter_shape=(3, 33), input_channels=128, output_channels=7, stride=(1, 2, 2, 1),
56 |                           name='Third_deconv')
57 | 
58 |     aModel.addReshape((batch_size, 7, 257, 32))
59 | 
60 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(5, 89), input_channels=32, output_channels=1,
61 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
62 |     aModel.addReshape((batch_size, 7, 257))
63 | 
64 | print(aModel.description())
65 | 
66 | model_vars = tf.trainable_variables()
67 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
68 | 
69 | aContextEncoderNetwork = StftTestContextEncoder(model=aModel, batch_size=batch_size, stft=stft, window_size=window_size,
70 |                                                gap_length=gap_length, learning_rate=1e-4, name='nat_mag_stft_5_')
71 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
72 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftRealImagTest.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | # from tensorflow.contrib import slim
 4 | from network.emptyTFGraph import EmptyTfGraph
 5 | from utils.legacy.stftRealImagContextEncoder import StftRealImagContextEncoder
 6 | 
 7 | __author__ = 'Andres'
 8 | 
 9 | tf.reset_default_graph()
10 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
11 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
12 | 
13 | window_size = 5120
14 | gap_length = 1024
15 | batch_size = 256
16 | 
17 | fft_frame_length = 512
18 | fft_frame_step = 128
19 | 
20 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="context encoder")
21 | 
22 | aModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)
23 | aModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 37, 257, 2)
24 | 
25 | with tf.name_scope('Remove_gap_stft'):
26 |     stft = aModel.output()
27 |     sides_stft = tf.concat((stft[:, :15, :, :], stft[:, 15+7:, :, :]), axis=3)  # (256, 15, 257, 4)
28 |     aModel.setOutputTo(sides_stft)
29 | print(aModel.output())
30 | 
31 | with tf.variable_scope("Encoder"):
32 |     filter_shapes = [(7, 89), (3, 17), (2, 6), (1, 5), (1, 3)]
33 |     input_channels = [4, 32, 64, 128, 128]
34 |     output_channels = [32, 64, 128, 128, 200]
35 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 1, 2, 1], [1, 1, 1, 1]]
36 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
37 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
38 |                                 output_channels=output_channels, strides=strides, names=names)
39 | 
40 | aModel.addReshape((batch_size, 3200))
41 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
42 | aModel.addRelu()
43 | aModel.addBatchNormalization()
44 | aModel.addDropout(0.3)
45 | aModel.addReshape((batch_size, 8, 8, 32))
46 | 
47 | with tf.variable_scope("Decoder"):
48 |     filter_shapes = [(5, 5), (3, 3)]
49 |     input_channels = [32, 64]
50 |     output_channels = [64, 257]
51 |     strides = [[1, 2, 2, 1]] * len(input_channels)
52 |     names = ['First_Deconv', 'Second_Deconv']
53 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
54 |                                   output_channels=output_channels, strides=strides, names=names)
55 | 
56 |     aModel.addReshape((batch_size, 8, 257, 128))
57 |     aModel.addDeconvLayer(filter_shape=(3, 33), input_channels=128, output_channels=7, stride=(1, 2, 2, 1),
58 |                           name='Third_deconv')
59 |     aModel.addBatchNormalization()
60 |     aModel.addDropout(0.1)
61 | 
62 |     aModel.addReshape((batch_size, 7, 257, 32))
63 | 
64 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(5, 89), input_channels=32, output_channels=2,
65 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
66 | 
67 | print(aModel.description())
68 | 
69 | # model_vars = tf.trainable_variables()
70 | # slim.model_analyzer.analyze_vars(model_vars, print_info=True)
71 | 
72 | aContextEncoderNetwork = StftRealImagContextEncoder(model=aModel, batch_size=batch_size, stft=stft, window_size=window_size,
73 |                                                gap_length=gap_length, learning_rate=1e-4, name='nat_mag_real_imag_1_')
74 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
75 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftSeventh.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from network.emptyTFGraph import EmptyTfGraph
 4 | from utils.legacy.contextEncoder import ContextEncoderNetwork
 5 | 
 6 | __author__ = 'Andres'
 7 | 
 8 | tf.reset_default_graph()
 9 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
10 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
11 | 
12 | window_size = 5120
13 | gap_length = 1024
14 | batch_size = 256
15 | 
16 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
17 | 
18 | dataset = aModel.output()
19 | signal_length = window_size - gap_length
20 | first_half = dataset[:, :signal_length // 2]
21 | second_half = dataset[:, signal_length // 2:]
22 | stacked_halfs = tf.stack([first_half, second_half], axis=1)
23 | 
24 | with tf.name_scope('Energy_Spectogram'):
25 |     fft_frame_length = 512
26 |     fft_frame_step = 128
27 |     stft = tf.contrib.signal.stft(signals=stacked_halfs, frame_length=fft_frame_length, frame_step=fft_frame_step)
28 |     real_stft = tf.real(stft)
29 |     imag_stft = tf.imag(stft)
30 |     real_stft_left = real_stft[:, 0, :, :]
31 |     real_stft_right = real_stft[:, 1, :, :]
32 | 
33 |     imag_stft_left = imag_stft[:, 0, :, :]
34 |     imag_stft_right = imag_stft[:, 1, :, :]
35 | 
36 |     real_stft = tf.concat([real_stft_left, real_stft_right], 1)
37 |     imag_stft = tf.concat([imag_stft_left, imag_stft_right], 1)
38 |     print(real_stft)
39 | 
40 |     stacked = tf.stack([real_stft, imag_stft], axis=3)
41 |     aModel.setOutputTo(stacked)
42 | 
43 | with tf.variable_scope("Encoder"):
44 |     filter_widths = [(9, 97), (5, 9), (3, 3), (2, 2)]
45 |     input_channels = [2, 32, 64, 128]
46 |     output_channels = [32, 64, 128, 160]
47 |     strides = [[1, 2, 4, 1], [1, 2, 4, 1], [1, 2, 4, 1], [1, 1, 1, 1]]
48 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv']
49 |     aModel.addSeveralConvLayers(filter_shapes=filter_widths, input_channels=input_channels,
50 |                                 output_channels=output_channels, strides=strides, names=names)
51 | 
52 | aModel.addReshape((batch_size, 3200))
53 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
54 | aModel.addRelu()
55 | aModel.addReshape((batch_size, 1, 32, 64))
56 | 
57 | with tf.variable_scope("Decoder"):
58 |     filter_widths = [(1, 11), (1, 3), (1, 3), (1, 11), (1, 97)]
59 |     input_channels = [64, 128, 256, 128, 64]
60 |     output_channels = [128, 256, 128, 64, 16]
61 |     strides = [[1, 1, 2, 1]] * len(input_channels)
62 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv', 'Fourth_Deconv', 'Fifth_Deconv']
63 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_widths, input_channels=input_channels,
64 |                                   output_channels=output_channels, strides=strides, names=names)
65 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(1, 1024), input_channels=16, output_channels=1,
66 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
67 |     aModel.addReshape((batch_size, gap_length))
68 | 
69 | aContextEncoderNetwork = ContextEncoderNetwork(model=aModel, batch_size=batch_size, window_size=window_size,
70 |                                                gap_length=gap_length, learning_rate=1e-5, name='nat_full_stft_8_')
71 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
72 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftEigth.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from network.emptyTFGraph import EmptyTfGraph
 4 | from utils.legacy.contextEncoder import ContextEncoderNetwork
 5 | 
 6 | __author__ = 'Andres'
 7 | 
 8 | tf.reset_default_graph()
 9 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
10 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
11 | 
12 | window_size = 5120
13 | gap_length = 1024
14 | batch_size = 256
15 | 
16 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
17 | 
18 | dataset = aModel.output()
19 | signal_length = window_size - gap_length
20 | first_half = dataset[:, :signal_length // 2]
21 | second_half = dataset[:, signal_length // 2:]
22 | stacked_halfs = tf.stack([first_half, second_half], axis=1)
23 | 
24 | with tf.name_scope('Energy_Spectogram'):
25 |     fft_frame_length = 512
26 |     fft_frame_step = 128
27 |     stft = tf.contrib.signal.stft(signals=stacked_halfs, frame_length=fft_frame_length, frame_step=fft_frame_step)
28 |     real_stft = tf.real(stft)
29 |     imag_stft = tf.imag(stft)
30 |     real_stft_left = real_stft[:, 0, :, :]
31 |     real_stft_right = real_stft[:, 1, :, :]
32 | 
33 |     imag_stft_left = imag_stft[:, 0, :, :]
34 |     imag_stft_right = imag_stft[:, 1, :, :]
35 | 
36 |     real_stft = tf.concat([real_stft_left, real_stft_right], 1)
37 |     imag_stft = tf.concat([imag_stft_left, imag_stft_right], 1)
38 |     print(real_stft)
39 | 
40 |     stacked = tf.stack([real_stft, imag_stft], axis=3)
41 |     aModel.setOutputTo(stacked)
42 | 
43 | with tf.variable_scope("Encoder"):
44 |     filter_widths = [(7, 89), (4, 23), (2, 11), (2, 3), (1, 3)]
45 |     input_channels = [2, 32, 32, 128, 128]
46 |     output_channels = [32, 32, 128, 128, 200]
47 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 2, 2, 1], [1, 1, 1, 1]]
48 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
49 |     aModel.addSeveralConvLayers(filter_shapes=filter_widths, input_channels=input_channels,
50 |                                 output_channels=output_channels, strides=strides, names=names)
51 | 
52 | aModel.addReshape((batch_size, 3200))
53 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
54 | aModel.addRelu()
55 | aModel.addReshape((batch_size, 1, 32, 64))
56 | 
57 | with tf.variable_scope("Decoder"):
58 |     filter_widths = [(1, 11), (1, 3), (1, 3), (1, 11), (1, 97)]
59 |     input_channels = [64, 128, 256, 128, 64]
60 |     output_channels = [128, 256, 128, 64, 16]
61 |     strides = [[1, 1, 2, 1]] * len(input_channels)
62 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv', 'Fourth_Deconv', 'Fifth_Deconv']
63 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_widths, input_channels=input_channels,
64 |                                   output_channels=output_channels, strides=strides, names=names)
65 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(1, 1024), input_channels=16, output_channels=1,
66 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
67 |     aModel.addReshape((batch_size, gap_length))
68 | 
69 | aContextEncoderNetwork = ContextEncoderNetwork(model=aModel, batch_size=batch_size, window_size=window_size,
70 |                                                gap_length=gap_length, learning_rate=1e-5, name='nat_full_stft_8_')
71 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
72 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftSixth.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.contrib.signal.python.ops import window_ops
 5 | 
 6 | from network.tfGraph import TFGraph
 7 | from utils.legacy.contextEncoder import ContextEncoderNetwork
 8 | 
 9 | __author__ = 'Andres'
10 | 
11 | tf.reset_default_graph()
12 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
13 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
14 | 
15 | window_size = 5120
16 | gap_length = 1024
17 | batch_size = 256
18 | 
19 | aModel = TFGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
20 | 
21 | dataset = aModel.output()
22 | signal_length = window_size - gap_length
23 | first_half = dataset[:, :signal_length // 2]
24 | second_half = dataset[:, signal_length // 2:]
25 | stacked_halfs = tf.stack([first_half, second_half], axis=1)
26 | 
27 | with tf.name_scope('Energy_Spectogram'):
28 |     fft_frame_length = 512
29 |     fft_frame_step = 128
30 |     window_fn = functools.partial(window_ops.hann_window, periodic=True)
31 | 
32 |     stft = tf.contrib.signal.stft(signals=stacked_halfs, frame_length=fft_frame_length, frame_step=fft_frame_step,
33 |                                   window_fn=window_fn)
34 |     real_stft = tf.real(stft)
35 |     imag_stft = tf.imag(stft)
36 |     real_stft_left = real_stft[:, 0, :, :]
37 |     real_stft_right = real_stft[:, 1, :, :]
38 | 
39 |     imag_stft_left = imag_stft[:, 0, :, :]
40 |     imag_stft_right = imag_stft[:, 1, :, :]
41 | 
42 |     real_stft = tf.concat([real_stft_left, real_stft_right], 1)
43 |     imag_stft = tf.concat([imag_stft_left, imag_stft_right], 1)
44 |     print(real_stft)
45 | 
46 |     stacked = tf.stack([real_stft, imag_stft], axis=3)
47 |     aModel.setOutputTo(stacked)
48 | 
49 | with tf.variable_scope("Encoder"):
50 |     filter_widths = [(7, 89), (4, 43), (2, 11), (2, 3), (2, 5)]
51 |     input_channels = [2, 16, 32, 128, 128]
52 |     output_channels = [16, 32, 128, 128, 64]
53 |     strides = [[1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1], [1, 1, 1, 1]]
54 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
55 |     aModel.addSeveralConvLayers(filter_shapes=filter_widths, input_channels=input_channels,
56 |                                 output_channels=output_channels, strides=strides, names=names)
57 |     print(aModel.output())
58 | 
59 | aModel.addReshape((batch_size, 2176))
60 | aModel.addFullyConnectedLayer(2176, 1152, 'Fully')
61 | aModel.addRelu()
62 | aModel.addReshape((batch_size, 1, 9, 128))
63 | 
64 | with tf.variable_scope("Decoder"):
65 |     filter_widths = [(1, 3), (2, 3), (2, 5)]
66 |     input_channels = [128, 256, 64]
67 |     output_channels = [256, 64, 128]
68 |     strides = [[1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1]]
69 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv']
70 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_widths, input_channels=input_channels,
71 |                                   output_channels=output_channels, strides=strides, names=names)
72 |     aModel.addReshape((batch_size, 2, 128, 288))
73 | 
74 |     aModel.addDeconvLayer(filter_shape=(1, 17), input_channels=288, output_channels=20, stride=(1, 2, 2, 1),
75 |                           name='first_deconv_after_reshape')
76 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(4, 129), input_channels=20, output_channels=1,
77 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
78 |     aModel.addReshape((batch_size, gap_length))
79 | 
80 | print(aModel.description())
81 | aContextEncoderNetwork = ContextEncoderNetwork(model=aModel, batch_size=batch_size, window_size=window_size,
82 |                                                gap_length=gap_length, learning_rate=1e-4, name='nat_full_stft_6_')
83 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
84 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatMagPhaseGapTest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from network.emptyTFGraph import EmptyTfGraph
 5 | from system.preAndPostProcessor import PreAndPostProcessor
 6 | from utils.legacy.stftPhaseContextEncoder import StftPhaseContextEncoder
 7 | 
 8 | sys.path.insert(0, '../')
 9 | import tensorflow as tf
10 | from tensorflow.contrib import slim
11 | import socket
12 | if 'omenx' in socket.gethostname():
13 |     os.environ["CUDA_VISIBLE_DEVICES"]="0"
14 | 
15 | 
16 | __author__ = 'Andres'
17 | 
18 | tf.reset_default_graph()
19 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
20 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
21 | 
22 | signal_length = 5120
23 | gap_length = 1024
24 | batch_size = 256
25 | 
26 | fft_window_length = 512
27 | fft_hop_size = 128
28 | 
29 | aTargetModel = EmptyTfGraph(shapeOfInput=(batch_size, signal_length), name="Target Model")
30 | anStftForTheInpaintingSetting = PreAndPostProcessor(signalLength=signal_length,
31 |                                                     gapLength=gap_length,
32 |                                                     fftWindowLength=fft_window_length,
33 |                                                     fftHopSize=fft_hop_size)
34 | anStftForTheInpaintingSetting.addStftForGapTo(aTargetModel)
35 | aTargetModel.divideComplexOutputIntoMagAndPhase()  # (256, 11, 257, 2)
36 | 
37 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, signal_length), name="context encoder")
38 | 
39 | anStftForTheInpaintingSetting.addStftForTheContextTo(aModel)
40 | aModel.divideComplexOutputIntoMagAndPhase()
41 | aModel.addReshape((batch_size, 16, 257, 4))
42 | 
43 | with tf.variable_scope("Encoder"):
44 |     filter_shapes = [(7, 89), (3, 17), (2, 6), (1, 5), (1, 3)]
45 |     input_channels = [4, 32, 64, 128, 128]
46 |     output_channels = [32, 64, 128, 128, 200]
47 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 1, 2, 1], [1, 1, 1, 1]]
48 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
49 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
50 |                                 output_channels=output_channels, strides=strides, names=names)
51 | 
52 | aModel.addReshape((batch_size, 3200))
53 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
54 | aModel.addRelu()
55 | aModel.addBatchNormalization()
56 | aModel.addReshape((batch_size, 8, 8, 32))
57 | 
58 | with tf.variable_scope("Decoder"):
59 |     filter_shapes = [(5, 5), (3, 3)]
60 |     input_channels = [32, 64]
61 |     output_channels = [64, 257]
62 |     strides = [[1, 2, 2, 1]] * len(input_channels)
63 |     names = ['First_Deconv', 'Second_Deconv']
64 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
65 |                                   output_channels=output_channels, strides=strides, names=names)
66 | 
67 |     aModel.addReshape((batch_size, 8, 257, 128))
68 |     aModel.addDeconvLayer(filter_shape=(3, 33), input_channels=128, output_channels=11, stride=(1, 2, 2, 1),
69 |                           name='Third_deconv')
70 |     aModel.addBatchNormalization()
71 | 
72 |     aModel.addReshape((batch_size, 11, 257, 32))
73 | 
74 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(5, 89), input_channels=32, output_channels=1,
75 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
76 | 
77 | print(aModel.description())
78 | 
79 | model_vars = tf.trainable_variables()
80 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
81 | 
82 | aContextEncoderNetwork = StftPhaseContextEncoder(model=aModel, batch_size=batch_size, target_model=aTargetModel, window_size=signal_length,
83 |                                                gap_length=gap_length, learning_rate=1e-3, name='nat_mag_phase_times_mag_gap_')
84 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
85 | 


--------------------------------------------------------------------------------
/utils/legacy/notebooks/try.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Context Encoder\n",
 8 |     "\n",
 9 |     "In this notebook we are going to be trying different networks to test their performance.\n",
10 |     "Let's begin by importing tensorflow and the network.\n"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": null,
16 |    "metadata": {
17 |     "scrolled": false
18 |    },
19 |    "outputs": [],
20 |    "source": [
21 |     "import tensorflow as tf\n",
22 |     "from network.contextEncoder import ContextEncoderNetwork"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "markdown",
27 |    "metadata": {},
28 |    "source": [
29 |     "Next, we have a modifiable version of the context encoder. The goal is to be able to easily modify the network and try other ideas."
30 |    ]
31 |   },
32 |   {
33 |    "cell_type": "code",
34 |    "execution_count": null,
35 |    "metadata": {},
36 |    "outputs": [],
37 |    "source": [
38 |     "class ModifiedContextEncoderNetwork(ContextEncoderNetwork):\n",
39 |     "    def _encoder(self, model, isTraining):\n",
40 |     "        with tf.variable_scope(\"Encoder\"):\n",
41 |     "            model.addReshape((self._batch_size, self._window_size - self._gap_length, 1))\n",
42 |     "            model.addConvLayer(filter_width=129, input_channels=1, output_channels=16,\n",
43 |     "                                      stride=4, name=\"First_Conv\", isTraining=isTraining)\n",
44 |     "            model.addConvLayer(filter_width=65, input_channels=16, output_channels=64,\n",
45 |     "                                      stride=4, name=\"Second_Conv\", isTraining=isTraining)\n",
46 |     "            model.addConvLayer(filter_width=33, input_channels=64, output_channels=256,\n",
47 |     "                                      stride=4, name=\"Third_Conv\", isTraining=isTraining)\n",
48 |     "            model.addConvLayer(filter_width=17, input_channels=256, output_channels=1024,\n",
49 |     "                                      stride=4, name=\"Fourth_Conv\", isTraining=isTraining)\n",
50 |     "            model.addConvLayer(filter_width=9, input_channels=1024, output_channels=4096,\n",
51 |     "                                      stride=4, name=\"Last_Conv\", isTraining=isTraining)\n",
52 |     "\n",
53 |     "    def _decoder(self, model, isTraining):\n",
54 |     "        with tf.variable_scope(\"Decoder\"):\n",
55 |     "            model.addConvLayerWithoutNonLin(filter_width=5, input_channels=4096, output_channels=1024,\n",
56 |     "                                            stride=4, name=\"Decode_Conv\", isTraining=isTraining)\n",
57 |     "            model.addReshape((self._batch_size, self._gap_length))\n"
58 |    ]
59 |   },
60 |   {
61 |    "cell_type": "code",
62 |    "execution_count": null,
63 |    "metadata": {},
64 |    "outputs": [],
65 |    "source": [
66 |     "tf.reset_default_graph()\n",
67 |     "\n",
68 |     "train_filename = 'train_full_w5120_g1024_h512_19404621.tfrecords'\n",
69 |     "valid_filename = 'valid_full_w5120_g1024_h512_ex913967.tfrecords'\n",
70 |     "\n",
71 |     "aContextEncoderNetwork = ModifiedContextEncoderNetwork(batch_size=256, window_size=5120, gap_length=1024, \n",
72 |     "                                             learning_rate=1e-5, name='first_try')\n",
73 |     "aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)"
74 |    ]
75 |   }
76 |  ],
77 |  "metadata": {
78 |   "kernelspec": {
79 |    "display_name": "Python 3",
80 |    "language": "python",
81 |    "name": "python3"
82 |   },
83 |   "language_info": {
84 |    "codemirror_mode": {
85 |     "name": "ipython",
86 |     "version": 3
87 |    },
88 |    "file_extension": ".py",
89 |    "mimetype": "text/x-python",
90 |    "name": "python",
91 |    "nbconvert_exporter": "python",
92 |    "pygments_lexer": "ipython3",
93 |    "version": "3.6.2"
94 |   }
95 |  },
96 |  "nbformat": 4,
97 |  "nbformat_minor": 2
98 | }
99 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftSec.py:
--------------------------------------------------------------------------------
 1 | """    
 2 | This network uses an stft representation of the sides of the signal to produce audio as an output.
 3 | The frame length was set to 512 and the frame step to 64, although it should be 128.
 4 | There are three convolutions and three deconvolutions.
 5 | 
 6 | This small network was trained for 1141999 steps. It appears to still be learning.
 7 | The best values on the validation were found at step 1133999:
 8 | 
 9 | 	    SNRs 1133999	reconstruction_loss 1133999
10 | count	65536	        65536
11 | mean	10.638316068	7.60381269454956
12 | std	    7.0298398286	19.9564838409424
13 | min	    -27.67805389	0.011582373641431
14 | 25%	    4.3873017658	0.263745993375778
15 | 50%	    11.126480919	1.37784320116043
16 | 75%	    16.480553727	5.75252687931061
17 | max	    30.251670154	1465.634765625
18 | 
19 | 
20 | """
21 | 
22 | 
23 | 
24 | 
25 | import tensorflow as tf
26 | 
27 | from network.emptyTFGraph import EmptyTfGraph
28 | from utils.legacy.contextEncoder import ContextEncoderNetwork
29 | 
30 | __author__ = 'Andres'
31 | 
32 | tf.reset_default_graph()
33 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
34 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
35 | 
36 | window_size = 5120
37 | gap_length = 1024
38 | batch_size = 256
39 | 
40 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
41 | 
42 | dataset = aModel.output()
43 | signal_length = window_size - gap_length
44 | first_half = dataset[:, :signal_length // 2]
45 | second_half = dataset[:, signal_length // 2:]
46 | stacked_halfs = tf.stack([first_half, second_half], axis=1)
47 | 
48 | with tf.name_scope('Energy_Spectogram'):
49 |     fft_frame_length = 512
50 |     fft_frame_step = 128
51 |     stft = tf.contrib.signal.stft(signals=stacked_halfs, frame_length=fft_frame_length, frame_step=fft_frame_step)
52 |     real_stft = tf.real(stft)
53 |     imag_stft = tf.imag(stft)
54 |     real_stft_left = real_stft[:, 0, :, :]
55 |     real_stft_right = real_stft[:, 1, :, :]
56 | 
57 |     imag_stft_left = imag_stft[:, 0, :, :]
58 |     imag_stft_right = imag_stft[:, 1, :, :]
59 | 
60 |     real_stft = tf.concat([real_stft_left, real_stft_right], 1)
61 |     imag_stft = tf.concat([imag_stft_left, imag_stft_right], 1)
62 |     print(real_stft)
63 | 
64 |     stacked = tf.stack([real_stft, imag_stft], axis=3)
65 |     aModel.setOutputTo(stacked)
66 | 
67 | with tf.variable_scope("Encoder"):
68 |     filter_widths = [(9, 33), (5, 9), (3, 3)]
69 |     input_channels = [2, 32, 64]
70 |     output_channels = [32, 64, 128]
71 |     strides = [[1, 2, 4, 1], [1, 2, 4, 1], [1, 2, 4, 1]]
72 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv']
73 |     aModel.addSeveralConvLayers(filter_shapes=filter_widths, input_channels=input_channels,
74 |                                 output_channels=output_channels, strides=strides, names=names)
75 | 
76 | aModel.addReshape((batch_size, 2560))
77 | aModel.addFullyConnectedLayer(2560, 2048, 'Fully')
78 | aModel.addRelu()
79 | aModel.addReshape((batch_size, 1, 32, 64))
80 | 
81 | with tf.variable_scope("Decoder"):
82 |     filter_widths = [(1, 17), (1, 65)]
83 |     input_channels = [64, 64]
84 |     output_channels = [64, 16]
85 |     strides = [[1, 1, 4, 1]] * len(input_channels)
86 |     names = ['First_Deconv', 'Second_Deconv']
87 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_widths, input_channels=input_channels,
88 |                                   output_channels=output_channels, strides=strides, names=names)
89 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(1, 129), input_channels=16, output_channels=1,
90 |                                        stride=(1, 1, 2, 1), name="Last_Deconv")
91 |     aModel.addReshape((batch_size, gap_length))
92 | 
93 | print(aModel.description())
94 | aContextEncoderNetwork = ContextEncoderNetwork(model=aModel, batch_size=batch_size, window_size=window_size,
95 |                                                gap_length=gap_length, learning_rate=1e-5, name='nat_full_stft_2')
96 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
97 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftGapTest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from network.emptyTFGraph import EmptyTfGraph
 5 | from system.preAndPostProcessor import PreAndPostProcessor
 6 | 
 7 | sys.path.insert(0, '../')
 8 | import tensorflow as tf
 9 | from tensorflow.contrib import slim
10 | import socket
11 | if 'omenx' in socket.gethostname():
12 |     os.environ["CUDA_VISIBLE_DEVICES"]="0"
13 | 
14 | from utils.legacy.stftGapContextEncoder import StftGapContextEncoder
15 | 
16 | __author__ = 'Andres'
17 | 
18 | tf.reset_default_graph()
19 | if 'omenx' in socket.gethostname():
20 |     train_filename = '/store/nati/datasets/Nsynth/train_w5120_g1024_h512.tfrecords'
21 |     valid_filename = '/store/nati/datasets/Nsynth/valid_w5120_g1024_h512.tfrecords'
22 | else:
23 |     train_filename = '/scratch/snx3000/nperraud/data/NSynth/train_w5120_g1024_h512.tfrecords'
24 |     valid_filename = '/scratch/snx3000/nperraud/data/NSynth/valid_w5120_g1024_h512.tfrecords'    
25 | 
26 | 
27 | signal_length = 5120
28 | gap_length = 1024
29 | batch_size = 256
30 | 
31 | fft_window_length = 512
32 | fft_hop_size = 128
33 | 
34 | aTargetModel = EmptyTfGraph(shapeOfInput=(batch_size, signal_length), name="Target Model")
35 | anStftForTheInpaintingSetting = PreAndPostProcessor(signalLength=signal_length,
36 |                                                     gapLength=gap_length,
37 |                                                     fftWindowLength=fft_window_length,
38 |                                                     fftHopSize=fft_hop_size)
39 | anStftForTheInpaintingSetting.addStftForGapTo(aTargetModel)
40 | aTargetModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 11, 257, 2)
41 | 
42 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, signal_length), name="context encoder")
43 | anStftForTheInpaintingSetting.addStftForTheContextTo(aModel)
44 | aModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 32, 257, 2)
45 | aModel.addReshape((batch_size, 16, 257, 4))
46 | 
47 | with tf.variable_scope("Encoder"):
48 |     filter_shapes = [(7, 89), (3, 17), (2, 6), (1, 5), (1, 3)]
49 |     input_channels = [4, 32, 64, 128, 128]
50 |     output_channels = [32, 64, 128, 128, 200]
51 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 1, 2, 1], [1, 1, 1, 1]]
52 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
53 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
54 |                                 output_channels=output_channels, strides=strides, names=names)
55 | 
56 | aModel.addReshape((batch_size, 3200))
57 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
58 | aModel.addRelu()
59 | aModel.addBatchNormalization()
60 | aModel.addReshape((batch_size, 8, 8, 32))
61 | 
62 | with tf.variable_scope("Decoder"):
63 |     filter_shapes = [(5, 5), (3, 3)]
64 |     input_channels = [32, 64]
65 |     output_channels = [64, 257]
66 |     strides = [[1, 2, 2, 1]] * len(input_channels)
67 |     names = ['First_Deconv', 'Second_Deconv']
68 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
69 |                                   output_channels=output_channels, strides=strides, names=names)
70 | 
71 |     aModel.addReshape((batch_size, 8, 257, 128))
72 |     aModel.addDeconvLayer(filter_shape=(3, 33), input_channels=128, output_channels=11, stride=(1, 2, 2, 1),
73 |                           name='Third_deconv')
74 |     aModel.addBatchNormalization()
75 | 
76 |     aModel.addReshape((batch_size, 11, 257, 32))
77 | 
78 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(5, 89), input_channels=32, output_channels=2,
79 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
80 | 
81 | print(aModel.description())
82 | 
83 | model_vars = tf.trainable_variables()
84 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
85 | 
86 | aContextEncoderNetwork = StftGapContextEncoder(model=aModel, batch_size=batch_size, target_model=aTargetModel, window_size=signal_length,
87 |                                                gap_length=gap_length, learning_rate=1e-3, name='nat_stft_gap_baseline')
88 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
89 | 


--------------------------------------------------------------------------------
/system/dnnSystem.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import re
  3 | 
  4 | __author__ = 'Andres'
  5 | 
  6 | 
  7 | class DNNSystem(object):
  8 |     def __init__(self, architecture, name):
  9 |         self._architecture = architecture
 10 |         self._name = name
 11 | 
 12 |     def optimizer(self, learningRate):
 13 |         raise NotImplementedError("Subclass Responsibility")
 14 | 
 15 |     def _feedDict(self, data, sess, isTraining=True):
 16 |         raise NotImplementedError("Subclass Responsibility")
 17 | 
 18 |     def _evaluate(self, summariesDict, feed_dict, validReader, sess):
 19 |         raise NotImplementedError("Subclass Responsibility")
 20 | 
 21 |     def _loadReader(self, dataPath):
 22 |         raise NotImplementedError("Subclass Responsibility")
 23 | 
 24 |     def _evaluationSummaries(self):
 25 |         raise NotImplementedError("Subclass Responsibility")
 26 | 
 27 |     def train(self, trainTFRecordPath, validTFRecordPath, learningRate, numSteps=6e5, restoreNum=None):
 28 |         with tf.Session() as sess:
 29 |             trainReader = self._loadReader(trainTFRecordPath)
 30 |             validReader = self._loadReader(validTFRecordPath)
 31 |             optimizer = self.optimizer(learningRate)
 32 | 
 33 |             saver = tf.train.Saver(max_to_keep=100)
 34 |             path = self.modelsPath(restoreNum)
 35 |             _modelNum = get_trailing_number(path[:-5])
 36 | 
 37 |             if _modelNum == 0:
 38 |                 init = tf.global_variables_initializer()
 39 |                 sess.run([init, tf.local_variables_initializer()])
 40 |                 print("Initialized")
 41 |             else:
 42 |                 saver.restore(sess, path)
 43 |                 sess.run([tf.local_variables_initializer()])
 44 |                 print("Model restored.")
 45 | 
 46 |             logs_path = 'utils/logdir/' + self._name  # write each run to a diff folder.
 47 |             print("logs path:", logs_path)
 48 |             writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
 49 | 
 50 |             summariesDict = self._evaluationSummaries()
 51 | 
 52 |             try:
 53 |                 trainReader.start()
 54 |                 validReader.start()
 55 | 
 56 |                 for step in range(1, int(numSteps)):
 57 |                     try:
 58 |                         data = trainReader.dataOperation(session=sess)
 59 |                     except StopIteration:
 60 |                         print("End of queue at step", step)
 61 |                         break
 62 | 
 63 |                     feed_dict = self._feedDict(data, sess, isTraining=True)
 64 |                     sess.run(optimizer, feed_dict=feed_dict)
 65 | 
 66 |                     if step % 40 == 0:
 67 |                         train_summ = sess.run(self._architecture.lossSummaries(), feed_dict=feed_dict)
 68 |                         writer.add_summary(train_summ, _modelNum + step)
 69 |                     if step % 2000 == 0:
 70 |                         summaries = self._evaluate(summariesDict, feed_dict, validReader, sess)
 71 |                         for summary in summaries:
 72 |                             writer.add_summary(summary, _modelNum+step)
 73 |                         saver.save(sess, self.modelsPath(_modelNum + step))
 74 |             except KeyboardInterrupt:
 75 |                 pass
 76 | 
 77 |             saver.save(sess, self.modelsPath(_modelNum + step))
 78 |             trainReader.finish()
 79 |             validReader.finish()
 80 |             print("Finalizing at step:", _modelNum + step)
 81 |             print("Last saved model:", self.modelsPath(_modelNum + step))
 82 | 
 83 |     def modelsPath(self, models_number=None):
 84 |         pathdir = "utils/saved_models/" + self._name
 85 |         if models_number is None:
 86 |             ckpt = tf.train.get_checkpoint_state(pathdir)
 87 |             print(ckpt)
 88 |             if ckpt and ckpt.model_checkpoint_path:
 89 |                 return ckpt.model_checkpoint_path
 90 |             else:
 91 |                 models_number = 0
 92 |         models_path = pathdir + "/model-" + self._name
 93 |         models_ext = ".ckpt"
 94 |         return models_path + str(models_number) + models_ext
 95 | 
 96 | 
 97 | def get_trailing_number(s):
 98 |     m = re.search(r'\d+$', s)
 99 |     return int(m.group()) if m else None
100 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftGapToMagTest.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.contrib import slim
 3 | 
 4 | from network.emptyTFGraph import EmptyTfGraph
 5 | from utils.legacy.stftGapContextEncoder import StftGapContextEncoder
 6 | 
 7 | __author__ = 'Andres'
 8 | 
 9 | tf.reset_default_graph()
10 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
11 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
12 | 
13 | window_size = 5120
14 | gap_length = 1024
15 | batch_size = 256
16 | 
17 | fft_frame_length = 512
18 | fft_frame_step = 128
19 | 
20 | aTargetModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="Target Model")
21 | 
22 | with tf.name_scope('Remove_unnecesary_sides_before_stft'):
23 |     signal = aTargetModel.output()
24 |     signal_without_unnecesary_sides = signal[:, 1664:3456]
25 |     aTargetModel.setOutputTo(signal_without_unnecesary_sides)
26 | aTargetModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)
27 | aTargetModel.addAbs()  # (256, 11, 257)
28 | 
29 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="context encoder")
30 | 
31 | with tf.name_scope('Remove_gap_before_stft'):
32 |     signal = aModel.output()
33 |     left_side = signal[:, :2048]
34 |     right_side = signal[:, 2048+1024:]
35 |     
36 |     # This is strange. The window is 5K samples long, the hole 1024 and the 0 pading 384.
37 |     # Unless signal in in spectrogram. In that case, the code is not very clear. Maybe consider adding comments.
38 |     left_side_padded = tf.concat((left_side, tf.zeros((batch_size, 384))), axis=1)
39 |     right_side_padded = tf.concat((tf.zeros((batch_size, 384)), right_side), axis=1)
40 | 
41 |     # If you pad them with 0, maybe you also stack them allong axis 2 (one after the other.)
42 |     signal_without_gap = tf.stack((left_side_padded, right_side_padded), axis=1)  # (256, 2, 2432)
43 |     aModel.setOutputTo(signal_without_gap)
44 | 
45 | aModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)  # (256, 2, 16, 257)
46 | aModel.addReshape((batch_size, 32, 257))
47 | aModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 32, 257, 2)
48 | aModel.addReshape((batch_size, 16, 257, 4))
49 | 
50 | with tf.variable_scope("Encoder"):
51 |     filter_shapes = [(7, 89), (3, 17), (2, 6), (1, 5), (1, 3)]
52 |     input_channels = [4, 32, 64, 128, 128]
53 |     output_channels = [32, 64, 128, 128, 200]
54 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 1, 2, 1], [1, 1, 1, 1]]
55 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
56 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
57 |                                 output_channels=output_channels, strides=strides, names=names)
58 | 
59 | aModel.addReshape((batch_size, 3200))
60 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
61 | aModel.addRelu()
62 | aModel.addBatchNormalization()
63 | aModel.addReshape((batch_size, 8, 8, 32))
64 | 
65 | with tf.variable_scope("Decoder"):
66 |     filter_shapes = [(5, 5), (3, 3)]
67 |     input_channels = [32, 64]
68 |     output_channels = [64, 257]
69 |     strides = [[1, 2, 2, 1]] * len(input_channels)
70 |     names = ['First_Deconv', 'Second_Deconv']
71 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
72 |                                   output_channels=output_channels, strides=strides, names=names)
73 | 
74 |     aModel.addReshape((batch_size, 8, 257, 128))
75 |     aModel.addDeconvLayer(filter_shape=(3, 33), input_channels=128, output_channels=11, stride=(1, 2, 2, 1),
76 |                           name='Third_deconv')
77 |     aModel.addBatchNormalization()
78 | 
79 |     aModel.addReshape((batch_size, 11, 257, 32))
80 | 
81 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(5, 89), input_channels=32, output_channels=1,
82 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
83 | 
84 | print(aModel.description())
85 | 
86 | model_vars = tf.trainable_variables()
87 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
88 | 
89 | aContextEncoderNetwork = StftGapContextEncoder(model=aModel, batch_size=batch_size, target_model=aTargetModel, window_size=window_size,
90 |                                                gap_length=gap_length, learning_rate=1e-3, name='nat_stft_gap_mag_1_')
91 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
92 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftThird.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.contrib.signal.python.ops import window_ops
 5 | 
 6 | from network.tfGraph import TFGraph
 7 | from utils.legacy.contextEncoder import ContextEncoderNetwork
 8 | 
 9 | __author__ = 'Andres'
10 | 
11 | tf.reset_default_graph()
12 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
13 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
14 | 
15 | window_size = 5120
16 | gap_length = 1024
17 | batch_size = 256
18 | 
19 | aModel = TFGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
20 | 
21 | dataset = aModel.output()
22 | signal_length = window_size - gap_length
23 | first_half = dataset[:, :signal_length // 2]
24 | second_half = dataset[:, signal_length // 2:]
25 | stacked_halfs = tf.stack([first_half, second_half], axis=1)
26 | 
27 | with tf.name_scope('Energy_Spectogram'):
28 |     fft_frame_length = 512
29 |     fft_frame_step = 128
30 |     window_fn = functools.partial(window_ops.hann_window, periodic=True)
31 | 
32 |     stft = tf.contrib.signal.stft(signals=stacked_halfs, frame_length=fft_frame_length, frame_step=fft_frame_step,
33 |                                   window_fn=window_fn)
34 |     real_stft = tf.real(stft)
35 |     imag_stft = tf.imag(stft)
36 |     real_stft_left = real_stft[:, 0, :, :]
37 |     real_stft_right = real_stft[:, 1, :, :]
38 | 
39 |     imag_stft_left = imag_stft[:, 0, :, :]
40 |     imag_stft_right = imag_stft[:, 1, :, :]
41 | 
42 |     real_stft = tf.concat([real_stft_left, real_stft_right], 1)
43 |     imag_stft = tf.concat([imag_stft_left, imag_stft_right], 1)
44 |     print(real_stft)
45 | 
46 |     stacked = tf.stack([real_stft, imag_stft], axis=3)
47 |     aModel.setOutputTo(stacked)
48 | 
49 | with tf.variable_scope("Encoder"):
50 |     filter_widths = [(7, 89), (4, 43), (2, 11), (2, 5), (2, 3)]
51 |     input_channels = [2, 32, 128, 512, 128]
52 |     output_channels = [32, 128, 512, 128, 64]
53 |     strides = [[1, 1, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1]]
54 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
55 |     aModel.addSeveralConvLayers(filter_shapes=filter_widths, input_channels=input_channels,
56 |                                 output_channels=output_channels, strides=strides, names=names)
57 |     print(aModel.output())
58 | 
59 | # aModel.addReshape((batch_size, 3072))
60 | # aModel.addFullyConnectedLayer(3072, 2827, 'Fully')
61 | # aModel.addRelu()
62 | # aModel.addReshape((batch_size, 11, 257, 1))
63 | 
64 | with tf.variable_scope("Decoder"):
65 |     filter_widths = [(1, 3), (2, 3), (3, 4)]
66 |     input_channels = [64, 256, 512]
67 |     output_channels = [256, 512, 257]
68 |     strides = [[1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1]]
69 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv']
70 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_widths, input_channels=input_channels,
71 |                                   output_channels=output_channels, strides=strides, names=names)
72 |     aModel.addReshape((batch_size, 8, 257, 144))
73 | 
74 |     aModel.addDeconvLayer(filter_shape=(2, 111), input_channels=144, output_channels=11, stride=(1, 2, 1, 1),
75 |                           name='first_deconv_after_reshape')
76 |     aModel.addReshape((batch_size, 11, 257, 16))
77 | 
78 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(3, 11), input_channels=16, output_channels=2,
79 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
80 |     netOutput = aModel.output()
81 |     complexOutput = tf.complex(netOutput[:, :, :, 0], netOutput[:, :, :, 1])
82 |     print(complexOutput)
83 |     istft = tf.contrib.signal.inverse_stft(stfts=complexOutput, frame_length=fft_frame_length, frame_step=fft_frame_step,
84 |     window_fn=tf.contrib.signal.inverse_stft_window_fn(fft_frame_step,
85 |                                            forward_window_fn=window_fn))
86 |     padding = fft_frame_length-fft_frame_step
87 |     unPaddedIstft = istft[:, padding:-padding]
88 |     aModel.setOutputTo(unPaddedIstft)
89 |     aModel.addReshape((batch_size, gap_length))
90 | 
91 | aContextEncoderNetwork = ContextEncoderNetwork(model=aModel, batch_size=batch_size, window_size=window_size,
92 |                                                gap_length=gap_length, learning_rate=1e-4, name='nat_full_stft_3_')
93 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
94 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftFifth.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.contrib.signal.python.ops import window_ops
 5 | 
 6 | from network.emptyTFGraph import EmptyTfGraph
 7 | from utils.legacy.contextEncoder import ContextEncoderNetwork
 8 | 
 9 | __author__ = 'Andres'
10 | 
11 | tf.reset_default_graph()
12 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
13 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
14 | 
15 | window_size = 5120
16 | gap_length = 1024
17 | batch_size = 256
18 | 
19 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size - gap_length), name="context encoder")
20 | 
21 | dataset = aModel.output()
22 | signal_length = window_size - gap_length
23 | first_half = dataset[:, :signal_length // 2]
24 | second_half = dataset[:, signal_length // 2:]
25 | stacked_halfs = tf.stack([first_half, second_half], axis=1)
26 | 
27 | with tf.name_scope('Energy_Spectogram'):
28 |     fft_frame_length = 512
29 |     fft_frame_step = 128
30 |     window_fn = functools.partial(window_ops.hann_window, periodic=True)
31 | 
32 |     stft = tf.contrib.signal.stft(signals=stacked_halfs, frame_length=fft_frame_length, frame_step=fft_frame_step,
33 |                                   window_fn=window_fn)
34 |     real_stft = tf.real(stft)
35 |     imag_stft = tf.imag(stft)
36 |     real_stft_left = real_stft[:, 0, :, :]
37 |     real_stft_right = real_stft[:, 1, :, :]
38 | 
39 |     imag_stft_left = imag_stft[:, 0, :, :]
40 |     imag_stft_right = imag_stft[:, 1, :, :]
41 | 
42 |     real_stft = tf.concat([real_stft_left, real_stft_right], 1)
43 |     imag_stft = tf.concat([imag_stft_left, imag_stft_right], 1)
44 |     print(real_stft)
45 | 
46 |     stacked = tf.stack([real_stft, imag_stft], axis=3)
47 |     aModel.setOutputTo(stacked)
48 | 
49 | with tf.variable_scope("Encoder"):
50 |     filter_widths = [(7, 89), (4, 43), (2, 11), (2, 3), (2, 5)]
51 |     input_channels = [2, 16, 32, 128, 128]
52 |     output_channels = [16, 32, 128, 128, 64]
53 |     strides = [[1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1], [1, 1, 1, 1]]
54 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
55 |     aModel.addSeveralConvLayers(filter_shapes=filter_widths, input_channels=input_channels,
56 |                                 output_channels=output_channels, strides=strides, names=names)
57 |     print(aModel.output())
58 | 
59 | aModel.addReshape((batch_size, 2176))
60 | aModel.addFullyConnectedLayer(2176, 1152, 'Fully')
61 | aModel.addRelu()
62 | aModel.addReshape((batch_size, 2, 9, 64))
63 | 
64 | with tf.variable_scope("Decoder"):
65 |     filter_widths = [(1, 3), (2, 3), (2, 3)]
66 |     input_channels = [64, 256, 64]
67 |     output_channels = [256, 64, 257]
68 |     strides = [[1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1]]
69 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv']
70 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_widths, input_channels=input_channels,
71 |                                   output_channels=output_channels, strides=strides, names=names)
72 |     aModel.addReshape((batch_size, 8, 257, 144))
73 | 
74 |     aModel.addDeconvLayer(filter_shape=(2, 31), input_channels=144, output_channels=11, stride=(1, 2, 1, 1),
75 |                           name='first_deconv_after_reshape')
76 |     aModel.addReshape((batch_size, 11, 257, 16))
77 | 
78 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(3, 129), input_channels=16, output_channels=2,
79 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
80 |     netOutput = aModel.output()
81 |     complexOutput = tf.complex(netOutput[:, :, :, 0], netOutput[:, :, :, 1])
82 |     print(complexOutput)
83 |     istft = tf.contrib.signal.inverse_stft(stfts=complexOutput, frame_length=fft_frame_length, frame_step=fft_frame_step,
84 |     window_fn=tf.contrib.signal.inverse_stft_window_fn(fft_frame_step,
85 |                                            forward_window_fn=window_fn))
86 |     padding = fft_frame_length-fft_frame_step
87 |     unPaddedIstft = istft[:, padding:-padding]
88 |     aModel.setOutputTo(unPaddedIstft)
89 |     aModel.addReshape((batch_size, gap_length))
90 | 
91 | print(aModel.description())
92 | aContextEncoderNetwork = ContextEncoderNetwork(model=aModel, batch_size=batch_size, window_size=window_size,
93 |                                                gap_length=gap_length, learning_rate=1e-4, name='nat_full_stft_5_')
94 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
95 | 


--------------------------------------------------------------------------------
/system/preAndPostProcessor.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.contrib.signal.python.ops import window_ops
 5 | 
 6 | __author__ = 'Andres'
 7 | 
 8 | 
 9 | class PreAndPostProcessor(object):
10 |     def __init__(self, signalLength, gapLength, fftWindowLength, fftHopSize):
11 |         super(PreAndPostProcessor, self).__init__()
12 |         self._signalLength = signalLength
13 |         self._gapLength = gapLength
14 |         self._fftWindowLength = fftWindowLength
15 |         self._fftHopSize = fftHopSize
16 | 
17 |     def signalLength(self):
18 |         return self._signalLength
19 | 
20 |     def gapLength(self):
21 |         return self._gapLength
22 | 
23 |     def fftWindowLenght(self):
24 |         return self._fftWindowLength
25 | 
26 |     def fftHopSize(self):
27 |         return self._fftHopSize
28 | 
29 |     def padding(self):
30 |         return self._fftWindowLength - self._fftHopSize
31 | 
32 |     def stftForGapOf(self, aBatchOfSignals):
33 |         assert len(aBatchOfSignals.shape) == 2
34 |         signalWithoutExtraSides = self._removeExtraSidesForSTFTOfGap(aBatchOfSignals)
35 |         return self._realAndImagSTFT(signalWithoutExtraSides)
36 | 
37 |     def stftForTheContextOf(self, aBatchOfSignals):
38 |         assert len(aBatchOfSignals.shape) == 2
39 |         leftAndRightSideStacked = self._removeGap(aBatchOfSignals)
40 |         leftAndRightSideStackedAndPadded = self._addPaddingForStftOfContext(leftAndRightSideStacked)
41 | 
42 |         realAndImagSTFTOfLeftSide = self._realAndImagSTFT(leftAndRightSideStackedAndPadded[:, 0])
43 |         realAndImagSTFTOfRightSide = self._realAndImagSTFT(leftAndRightSideStackedAndPadded[:, 1])
44 | 
45 |         contextRealAndImagSTFT = tf.concat([realAndImagSTFTOfLeftSide, realAndImagSTFTOfRightSide], axis=-1)
46 |         return contextRealAndImagSTFT
47 | 
48 |     def _realAndImagSTFT(self, aBatchOfSignals):
49 |         stft = tf.contrib.signal.stft(signals=aBatchOfSignals,
50 |                                       frame_length=self._fftWindowLength, frame_step=self._fftHopSize)
51 |         return self._divideComplexIntoRealAndImag(stft)
52 | 
53 |     def inverseStftOfGap(self, batchOfStftOfGap):
54 |         window_fn = functools.partial(window_ops.hann_window, periodic=True)
55 |         inverse_window = tf.contrib.signal.inverse_stft_window_fn(self._fftWindowLength, forward_window_fn=window_fn)
56 |         padded_gaps = tf.contrib.signal.inverse_stft(stfts=batchOfStftOfGap, frame_length=self._fftWindowLength,
57 |                                                      frame_step=self._fftHopSize, window_fn=inverse_window)
58 |         return padded_gaps[:, self.padding():-self.padding()]
59 | 
60 |     def inverseStftOfSignal(self, batchOfStftsOfSignal):
61 |         window_fn = functools.partial(window_ops.hann_window, periodic=True)
62 |         inverse_window = tf.contrib.signal.inverse_stft_window_fn(self._fftWindowLength, forward_window_fn=window_fn)
63 |         return tf.contrib.signal.inverse_stft(stfts=batchOfStftsOfSignal, frame_length=self._fftWindowLength,
64 |                                               frame_step=self._fftHopSize, window_fn=inverse_window)
65 | 
66 |     def _gapBeginning(self):
67 |         return (self._signalLength - self._gapLength) // 2
68 | 
69 |     def _gapEnding(self):
70 |         return self._gapBeginning() + self._gapLength
71 | 
72 |     def _removeExtraSidesForSTFTOfGap(self, batchOfSignals):
73 |         return batchOfSignals[:, self._gapBeginning() - self.padding(): self._gapEnding() + self.padding()]
74 | 
75 |     def _removeGap(self, batchOfSignals):
76 |         leftSide = batchOfSignals[:, :self._gapBeginning()]
77 |         rightSide = batchOfSignals[:, self._gapEnding():]
78 |         return tf.stack((leftSide, rightSide), axis=1)
79 | 
80 |     def _addPaddingForStftOfContext(self, batchOfSides):
81 |         """batchOfSides should contain the left side on the first dimension and the right side on the second"""
82 |         batchSize = batchOfSides.shape.as_list()[0]
83 |         leftSidePadded = tf.concat((batchOfSides[:, 0], tf.zeros((batchSize, self.padding()))), axis=1)
84 |         rightSidePadded = tf.concat((tf.zeros((batchSize, self.padding())), batchOfSides[:, 1]), axis=1)
85 |         return tf.stack((leftSidePadded, rightSidePadded), axis=1)
86 | 
87 |     def _divideComplexIntoRealAndImag(self, complexTensor):
88 |         real_part = tf.real(complexTensor)
89 |         imag_part = tf.imag(complexTensor)
90 |         return tf.stack([real_part, imag_part], axis=-1, name='divideComplexIntoRealAndImag')
91 | 


--------------------------------------------------------------------------------
/system/magPreAndPostProcessor.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.contrib.signal.python.ops import window_ops
 5 | 
 6 | __author__ = 'Andres'
 7 | 
 8 | 
 9 | class MagPreAndPostProcessor(object):
10 |     def __init__(self, signalLength, gapLength, fftWindowLength, fftHopSize):
11 |         super(MagPreAndPostProcessor, self).__init__()
12 |         self._signalLength = signalLength
13 |         self._gapLength = gapLength
14 |         self._fftWindowLength = fftWindowLength
15 |         self._fftHopSize = fftHopSize
16 | 
17 |     def signalLength(self):
18 |         return self._signalLength
19 | 
20 |     def gapLength(self):
21 |         return self._gapLength
22 | 
23 |     def fftWindowLenght(self):
24 |         return self._fftWindowLength
25 | 
26 |     def fftHopSize(self):
27 |         return self._fftHopSize
28 | 
29 |     def padding(self):
30 |         return self._fftWindowLength - self._fftHopSize
31 | 
32 |     def stftForGapOf(self, aBatchOfSignals):
33 |         assert len(aBatchOfSignals.shape) == 2
34 |         signalWithoutExtraSides = self._removeExtraSidesForSTFTOfGap(aBatchOfSignals)
35 |         stft = tf.contrib.signal.stft(signals=signalWithoutExtraSides,
36 |                                       frame_length=self._fftWindowLength, frame_step=self._fftHopSize)
37 |         return tf.expand_dims(tf.abs(stft), axis=-1)
38 | 
39 |     def stftForTheContextOf(self, aBatchOfSignals):
40 |         assert len(aBatchOfSignals.shape) == 2
41 |         leftAndRightSideStacked = self._removeGap(aBatchOfSignals)
42 |         leftAndRightSideStackedAndPadded = self._addPaddingForStftOfContext(leftAndRightSideStacked)
43 | 
44 |         realAndImagSTFTOfLeftSide = self._realAndImagSTFT(leftAndRightSideStackedAndPadded[:, 0])
45 |         realAndImagSTFTOfRightSide = self._realAndImagSTFT(leftAndRightSideStackedAndPadded[:, 1])
46 | 
47 |         contextRealAndImagSTFT = tf.concat([realAndImagSTFTOfLeftSide, realAndImagSTFTOfRightSide], axis=-1)
48 |         return contextRealAndImagSTFT
49 | 
50 |     def _realAndImagSTFT(self, aBatchOfSignals):
51 |         stft = tf.contrib.signal.stft(signals=aBatchOfSignals,
52 |                                       frame_length=self._fftWindowLength, frame_step=self._fftHopSize)
53 |         return self._divideComplexIntoRealAndImag(stft)
54 | 
55 |     def inverseStftOfGap(self, batchOfStftOfGap):
56 |         window_fn = functools.partial(window_ops.hann_window, periodic=True)
57 |         inverse_window = tf.contrib.signal.inverse_stft_window_fn(self._fftWindowLength, forward_window_fn=window_fn)
58 |         padded_gaps = tf.contrib.signal.inverse_stft(stfts=batchOfStftOfGap, frame_length=self._fftWindowLength,
59 |                                                      frame_step=self._fftHopSize, window_fn=inverse_window)
60 |         return padded_gaps[:, self.padding():-self.padding()]
61 | 
62 |     def inverseStftOfSignal(self, batchOfStftsOfSignal):
63 |         window_fn = functools.partial(window_ops.hann_window, periodic=True)
64 |         inverse_window = tf.contrib.signal.inverse_stft_window_fn(self._fftWindowLength, forward_window_fn=window_fn)
65 |         return tf.contrib.signal.inverse_stft(stfts=batchOfStftsOfSignal, frame_length=self._fftWindowLength,
66 |                                               frame_step=self._fftHopSize, window_fn=inverse_window)
67 | 
68 |     def _gapBeginning(self):
69 |         return (self._signalLength - self._gapLength) // 2
70 | 
71 |     def _gapEnding(self):
72 |         return self._gapBeginning() + self._gapLength
73 | 
74 |     def _removeExtraSidesForSTFTOfGap(self, batchOfSignals):
75 |         return batchOfSignals[:, self._gapBeginning() - self.padding(): self._gapEnding() + self.padding()]
76 | 
77 |     def _removeGap(self, batchOfSignals):
78 |         leftSide = batchOfSignals[:, :self._gapBeginning()]
79 |         rightSide = batchOfSignals[:, self._gapEnding():]
80 |         return tf.stack((leftSide, rightSide), axis=1)
81 | 
82 |     def _addPaddingForStftOfContext(self, batchOfSides):
83 |         """batchOfSides should contain the left side on the first dimension and the right side on the second"""
84 |         batchSize = batchOfSides.shape.as_list()[0]
85 |         leftSidePadded = tf.concat((batchOfSides[:, 0], tf.zeros((batchSize, self.padding()))), axis=1)
86 |         rightSidePadded = tf.concat((tf.zeros((batchSize, self.padding())), batchOfSides[:, 1]), axis=1)
87 |         return tf.stack((leftSidePadded, rightSidePadded), axis=1)
88 | 
89 |     def _divideComplexIntoRealAndImag(self, complexTensor):
90 |         real_part = tf.real(complexTensor)
91 |         imag_part = tf.imag(complexTensor)
92 |         return tf.stack([real_part, imag_part], axis=-1, name='divideComplexIntoRealAndImag')
93 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftGapOneOneTest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This trained for 85k steps (24hs) with a learning rate of 1e-3 and didn't learn anything.
 3 | 
 4 | """
 5 | 
 6 | import tensorflow as tf
 7 | from tensorflow.contrib import slim
 8 | 
 9 | from network.emptyTFGraph import EmptyTfGraph
10 | from utils.legacy.stftGapContextEncoder import StftGapContextEncoder
11 | 
12 | __author__ = 'Andres'
13 | 
14 | tf.reset_default_graph()
15 | train_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
16 | valid_filename = '../test_w5120_g1024_h512_ex63501.tfrecords'
17 | 
18 | window_size = 5120
19 | gap_length = 1024
20 | batch_size = 256
21 | 
22 | fft_frame_length = 512
23 | fft_frame_step = 128
24 | 
25 | aTargetModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="Target Model")
26 | 
27 | with tf.name_scope('Remove_unnecesary_sides_before_stft'):
28 |     signal = aTargetModel.output()
29 |     signal_without_unnecesary_sides = signal[:, 1664:3456]
30 |     aTargetModel.setOutputTo(signal_without_unnecesary_sides)
31 | aTargetModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)
32 | aTargetModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 11, 257, 2)
33 | 
34 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="context encoder")
35 | 
36 | with tf.name_scope('Remove_gap_before_stft'):
37 |     signal = aModel.output()
38 |     left_side = signal[:, :2048]
39 |     right_side = signal[:, 2048+1024:]
40 |     
41 |     # This is strange. The window is 5K samples long, the hole 1024 and the 0 pading 384.
42 |     # Unless signal in in spectrogram. In that case, the code is not very clear. Maybe consider adding comments.
43 |     left_side_padded = tf.concat((left_side, tf.zeros((batch_size, 384))), axis=1)
44 |     right_side_padded = tf.concat((tf.zeros((batch_size, 384)), right_side), axis=1)
45 | 
46 |     # If you pad them with 0, maybe you also stack them allong axis 2 (one after the other.)
47 |     signal_without_gap = tf.stack((left_side_padded, right_side_padded), axis=1)  # (256, 2, 2432)
48 |     aModel.setOutputTo(signal_without_gap)
49 | 
50 | aModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)  # (256, 2, 16, 257)
51 | aModel.addReshape((batch_size, 32, 257))
52 | aModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 32, 257, 2)
53 | aModel.addReshape((batch_size, 16, 257, 4))
54 | 
55 | with tf.variable_scope("Encoder"):
56 |     filter_shapes = [(7, 89), (3, 17), (2, 9), (1, 5), (2, 5), (2, 5)]
57 |     input_channels = [4, 32, 128, 512, 256, 128]
58 |     output_channels = [32, 128, 512, 256, 128, 256]
59 |     strides = [[1, 2, 2, 1], [1, 2, 2, 1], [1, 2, 2, 1], [1, 1, 2, 1], [1, 1, 2, 1], [1, 1, 2, 1]]
60 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv', 'Sixth_Conv']
61 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
62 |                                 output_channels=output_channels, strides=strides, names=names)
63 | 
64 | aModel.addReshape((batch_size, 2560))
65 | aModel.addFullyConnectedLayer(2560, 2048, 'Fully')
66 | aModel.addRelu()
67 | aModel.addBatchNormalization()
68 | aModel.addReshape((batch_size, 8, 8, 32))
69 | 
70 | with tf.variable_scope("Decoder"):
71 |     filter_shapes = [(5, 5), (3, 3), (3, 3), (11, 11)]
72 |     input_channels = [32, 128, 512, 128]
73 |     output_channels = [128, 512, 128, 32]
74 |     strides = [[1, 2, 2, 1]] * len(input_channels)
75 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv', 'Fourth_Deconv']
76 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
77 |                                   output_channels=output_channels, strides=strides, names=names)
78 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(13, 13), input_channels=32, output_channels=2,
79 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
80 | 
81 |     aModel.addReshape((batch_size, 128, 2, 128))
82 |     aModel.addConvLayer(filter_shape=(1, 1), input_channels=128, output_channels=11, stride=(1, 1, 1, 1),
83 |                         name='first_1by1')
84 |     aModel.addReshape((batch_size, 11, 2, 128))
85 |     aModel.addConvLayer(filter_shape=(1, 1), input_channels=128, output_channels=257, stride=(1, 1, 1, 1),
86 |                         name='second_1by1')
87 |     aModel.addReshape((batch_size, 11, 257, 2))
88 | 
89 | print(aModel.description())
90 | 
91 | model_vars = tf.trainable_variables()
92 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
93 | 
94 | aContextEncoderNetwork = StftGapContextEncoder(model=aModel, batch_size=batch_size, target_model=aTargetModel, window_size=window_size,
95 |                                                gap_length=gap_length, learning_rate=1e-3, name='nat_stft_gap_1to1_1_')
96 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
97 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatStftGapBIGTest.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import sys
  4 | 
  5 | from network.emptyTFGraph import EmptyTfGraph
  6 | 
  7 | sys.path.insert(0, '../')
  8 | import tensorflow as tf
  9 | from tensorflow.contrib import slim
 10 | import socket
 11 | if 'omenx' in socket.gethostname():
 12 |     os.environ["CUDA_VISIBLE_DEVICES"]="1"
 13 | 
 14 | 
 15 | from utils.legacy.stftGapContextEncoder import StftGapContextEncoder
 16 | 
 17 | __author__ = 'Andres'
 18 | 
 19 | tf.reset_default_graph()
 20 | train_filename = '/scratch/fma_small_train_w5120_g1024_h512.tfrecords'
 21 | valid_filename = '/scratch/fma_small_valid_w5120_g1024_h512.tfrecords'
 22 | 
 23 | window_size = 5120
 24 | gap_length = 1024
 25 | batch_size = 256
 26 | 
 27 | fft_frame_length = 512
 28 | fft_frame_step = 128
 29 | 
 30 | aTargetModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="Target Model")
 31 | 
 32 | with tf.name_scope('Remove_unnecesary_sides_before_stft'):
 33 |     signal = aTargetModel.output()
 34 |     signal_without_unnecesary_sides = signal[:, 1664:3456]
 35 |     aTargetModel.setOutputTo(signal_without_unnecesary_sides)
 36 | aTargetModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)
 37 | aTargetModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 11, 257, 2)
 38 | 
 39 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="context encoder")
 40 | 
 41 | with tf.name_scope('Remove_gap_before_stft'):
 42 |     signal = aModel.output()
 43 |     left_side = signal[:, :2048]
 44 |     right_side = signal[:, 2048+1024:]
 45 |     
 46 |     # This is strange. The window is 5K samples long, the hole 1024 and the 0 pading 384.
 47 |     # Unless signal in in spectrogram. In that case, the code is not very clear. Maybe consider adding comments.
 48 |     left_side_padded = tf.concat((left_side, tf.zeros((batch_size, 384))), axis=1)
 49 |     right_side_padded = tf.concat((tf.zeros((batch_size, 384)), right_side), axis=1)
 50 | 
 51 |     # If you pad them with 0, maybe you also stack them allong axis 2 (one after the other.)
 52 |     signal_without_gap = tf.stack((left_side_padded, right_side_padded), axis=1)  # (256, 2, 2432)
 53 |     aModel.setOutputTo(signal_without_gap)
 54 | 
 55 | aModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)  # (256, 2, 16, 257)
 56 | aModel.addReshape((batch_size, 32, 257))
 57 | aModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 32, 257, 2)
 58 | aModel.addReshape((batch_size, 16, 257, 4))
 59 | 
 60 | with tf.variable_scope("Encoder"):
 61 |     filter_shapes = [(7, 89), (3, 17), (2, 11), (1, 9), (1, 5), (2, 5)]
 62 |     input_channels = [4, 32, 128, 512, 256, 160]
 63 |     output_channels = [32, 128, 512, 256, 160, 128]
 64 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 1, 2, 1], [1, 1, 1, 1], [1, 1, 1, 1]]
 65 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv', 'Sixth_Conv']
 66 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
 67 |                                 output_channels=output_channels, strides=strides, names=names)
 68 | 
 69 | aModel.addReshape((batch_size, 2048))
 70 | aModel.addFullyConnectedLayer(2048, 2048, 'Fully')
 71 | aModel.addRelu()
 72 | aModel.addBatchNormalization()
 73 | aModel.addReshape((batch_size, 8, 8, 32))
 74 | 
 75 | with tf.variable_scope("Decoder"):
 76 |     filter_shapes = [(8, 8), (5, 5), (3, 3)]
 77 |     input_channels = [32, 128, 512]
 78 |     output_channels = [128, 512, 257]
 79 |     strides = [[1, 2, 2, 1], [1, 2, 2, 1], [1, 1, 1, 1]]
 80 |     names = ['First_Deconv', 'Second_Deconv', 'Third_Deconv']
 81 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
 82 |                                   output_channels=output_channels, strides=strides, names=names)
 83 | 
 84 |     aModel.addReshape((batch_size, 8, 257, 128))
 85 |     aModel.addDeconvLayer(filter_shape=(5, 67), input_channels=128, output_channels=11, stride=(1, 2, 2, 1),
 86 |                           name='Fourth_deconv')
 87 |     aModel.addBatchNormalization()
 88 | 
 89 |     aModel.addReshape((batch_size, 11, 257, 32))
 90 | 
 91 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(11, 257), input_channels=32, output_channels=2,
 92 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
 93 | 
 94 | print(aModel.description())
 95 | 
 96 | model_vars = tf.trainable_variables()
 97 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
 98 | 
 99 | aContextEncoderNetwork = StftGapContextEncoder(model=aModel, batch_size=batch_size, target_model=aTargetModel, window_size=window_size,
100 |                                                gap_length=gap_length, learning_rate=1e-3, name='nat_stft_gap_big_1_')
101 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
102 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNat.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | from network.emptyTFGraph import EmptyTfGraph
  5 | 
  6 | sys.path.insert(0, '../')
  7 | import tensorflow as tf
  8 | from tensorflow.contrib import slim
  9 | import socket
 10 | if 'omenx' in socket.gethostname():
 11 |     os.environ["CUDA_VISIBLE_DEVICES"]="1"
 12 | 
 13 | from utils.legacy.stftGapContextEncoder import StftGapContextEncoder
 14 | 
 15 | __author__ = 'Andres'
 16 | 
 17 | tf.reset_default_graph()
 18 | if 'omenx' in socket.gethostname():
 19 |     train_filename = '/store/nati/datasets/Nsynth/train_w5120_g1024_h512.tfrecords'
 20 |     valid_filename = '/store/nati/datasets/Nsynth/valid_w5120_g1024_h512.tfrecords'
 21 | else:
 22 |     train_filename = '/scratch/snx3000/nperraud/data/NSynth/train_w5120_g1024_h512.tfrecords'
 23 |     valid_filename = '/scratch/snx3000/nperraud/data/NSynth/valid_w5120_g1024_h512.tfrecords'    
 24 | 
 25 | window_size = 5120
 26 | gap_length = 1024
 27 | batch_size = 256
 28 | 
 29 | fft_frame_length = 512
 30 | fft_frame_step = 128
 31 | 
 32 | aTargetModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="Target Model")
 33 | 
 34 | with tf.name_scope('Remove_unnecesary_sides_before_stft'):
 35 |     signal = aTargetModel.output()
 36 |     signal_without_unnecesary_sides = signal[:, 1664:3456]
 37 |     aTargetModel.setOutputTo(signal_without_unnecesary_sides)
 38 | aTargetModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)
 39 | aTargetModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 11, 257, 2)
 40 | 
 41 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="context encoder")
 42 | 
 43 | with tf.name_scope('Remove_gap_before_stft'):
 44 |     signal = aModel.output()
 45 |     left_side = signal[:, :2048]
 46 |     right_side = signal[:, 2048+1024:]
 47 |     
 48 |     # This is strange. The window is 5K samples long, the hole 1024 and the 0 pading 384.
 49 |     # Unless signal in in spectrogram. In that case, the code is not very clear. Maybe consider adding comments.
 50 |     left_side_padded = tf.concat((left_side, tf.zeros((batch_size, 384))), axis=1)
 51 |     right_side_padded = tf.concat((tf.zeros((batch_size, 384)), right_side), axis=1)
 52 | 
 53 |     # If you pad them with 0, maybe you also stack them allong axis 2 (one after the other.)
 54 |     signal_without_gap = tf.stack((left_side_padded, right_side_padded), axis=1)  # (256, 2, 2432)
 55 |     aModel.setOutputTo(signal_without_gap)
 56 | 
 57 | aModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)  # (256, 2, 16, 257)
 58 | aModel.addReshape((batch_size, 32, 257))
 59 | aModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 32, 257, 2)
 60 | aModel.addReshape((batch_size, 16, 257, 4))
 61 | 
 62 | with tf.variable_scope("Encoder"):
 63 |     filter_shapes = [(7, 89), (3, 17), (2, 6), (1, 5), (1, 3)]
 64 |     input_channels = [4, 32, 64, 128, 128]
 65 |     output_channels = [32, 64, 128, 128, 200]
 66 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 1, 2, 1], [1, 1, 1, 1]]
 67 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
 68 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
 69 |                                 output_channels=output_channels, strides=strides, names=names)
 70 | 
 71 | aModel.addReshape((batch_size, 3200))
 72 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
 73 | aModel.addRelu()
 74 | aModel.addBatchNormalization()
 75 | aModel.addReshape((batch_size, 8, 8, 32))
 76 | 
 77 | with tf.variable_scope("Decoder"):
 78 |     filter_shapes = [(5, 5), (3, 3)]
 79 |     input_channels = [32, 64]
 80 |     output_channels = [64, 257]
 81 |     strides = [[1, 2, 2, 1]] * len(input_channels)
 82 |     names = ['First_Deconv', 'Second_Deconv']
 83 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
 84 |                                   output_channels=output_channels, strides=strides, names=names)
 85 | 
 86 |     aModel.addReshape((batch_size, 8, 257, 128))
 87 |     aModel.addDeconvLayer(filter_shape=(3, 33), input_channels=128, output_channels=11, stride=(1, 2, 2, 1),
 88 |                           name='Third_deconv')
 89 |     aModel.addBatchNormalization()
 90 | 
 91 |     aModel.addReshape((batch_size, 11, 257, 32))
 92 | 
 93 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(5, 89), input_channels=32, output_channels=2,
 94 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
 95 | 
 96 | print(aModel.description())
 97 | 
 98 | model_vars = tf.trainable_variables()
 99 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
100 | 
101 | aContextEncoderNetwork = StftGapContextEncoder(model=aModel, batch_size=batch_size, target_model=aTargetModel, window_size=window_size,
102 |                                                gap_length=gap_length, learning_rate=1e-3, name='nat_stft_gap_1_')
103 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6, restore_num=None)
104 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatSkip.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | from network.emptyTFGraph import EmptyTfGraph
  5 | 
  6 | sys.path.insert(0, '../')
  7 | import tensorflow as tf
  8 | from tensorflow.contrib import slim
  9 | import socket
 10 | if 'omenx' in socket.gethostname():
 11 |     os.environ["CUDA_VISIBLE_DEVICES"]=""
 12 | 
 13 | from utils.legacy.stftGapContextEncoder import StftGapContextEncoder
 14 | 
 15 | __author__ = 'Andres'
 16 | 
 17 | tf.reset_default_graph()
 18 | if 'omenx' in socket.gethostname():
 19 |     train_filename = '/store/nati/datasets/Nsynth/train_w5120_g1024_h512.tfrecords'
 20 |     valid_filename = '/store/nati/datasets/Nsynth/valid_w5120_g1024_h512.tfrecords'
 21 | else:
 22 |     train_filename = '/scratch/snx3000/nperraud/data/NSynth/train_w5120_g1024_h512.tfrecords'
 23 |     valid_filename = '/scratch/snx3000/nperraud/data/NSynth/valid_w5120_g1024_h512.tfrecords'    
 24 | 
 25 | window_size = 5120
 26 | gap_length = 1024
 27 | batch_size = 256
 28 | 
 29 | fft_frame_length = 512
 30 | fft_frame_step = 128
 31 | 
 32 | aTargetModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="Target Model")
 33 | 
 34 | with tf.name_scope('Remove_unnecesary_sides_before_stft'):
 35 |     signal = aTargetModel.output()
 36 |     signal_without_unnecesary_sides = signal[:, 1664:3456]
 37 |     aTargetModel.setOutputTo(signal_without_unnecesary_sides)
 38 | aTargetModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)
 39 | aTargetModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 11, 257, 2)
 40 | 
 41 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="context encoder")
 42 | 
 43 | with tf.name_scope('Remove_gap_before_stft'):
 44 |     signal = aModel.output()
 45 |     left_side = signal[:, :2048]
 46 |     right_side = signal[:, 2048+1024:]
 47 |     
 48 |     # This is strange. The window is 5K samples long, the hole 1024 and the 0 pading 384.
 49 |     # Unless signal in in spectrogram. In that case, the code is not very clear. Maybe consider adding comments.
 50 |     left_side_padded = tf.concat((left_side, tf.zeros((batch_size, 384))), axis=1)
 51 |     right_side_padded = tf.concat((tf.zeros((batch_size, 384)), right_side), axis=1)
 52 | 
 53 |     # If you pad them with 0, maybe you also stack them allong axis 2 (one after the other.)
 54 |     signal_without_gap = tf.stack((left_side_padded, right_side_padded), axis=1)  # (256, 2, 2432)
 55 |     aModel.setOutputTo(signal_without_gap)
 56 | 
 57 | aModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)  # (256, 2, 16, 257)
 58 | aModel.addReshape((batch_size, 32, 257))
 59 | aModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 32, 257, 2)
 60 | aModel.addReshape((batch_size, 16, 257, 4))
 61 | 
 62 | with tf.variable_scope("Encoder"):
 63 |     filter_shapes = [(7, 89), (3, 17), (2, 6), (1, 5), (1, 3)]
 64 |     input_channels = [4, 32, 64, 128, 128]
 65 |     output_channels = [32, 64, 128, 128, 200]
 66 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 2, 3, 1], [1, 1, 2, 1], [1, 1, 1, 1]]
 67 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Fourth_Conv', 'Fifth_Conv']
 68 |     aModel.addSeveralConvLayersWithSkip(filter_shapes=filter_shapes, input_channels=input_channels,
 69 |                                 output_channels=output_channels, strides=strides, names=names)
 70 | 
 71 | aModel.addReshape((batch_size, 3200))
 72 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
 73 | aModel.addRelu()
 74 | aModel.addBatchNormalization()
 75 | aModel.addReshape((batch_size, 8, 8, 32))
 76 | 
 77 | with tf.variable_scope("Decoder"):
 78 |     filter_shapes = [(5, 5), (3, 3)]
 79 |     input_channels = [32, 64]
 80 |     output_channels = [64, 257]
 81 |     strides = [[1, 2, 2, 1]] * len(input_channels)
 82 |     names = ['First_Deconv', 'Second_Deconv']
 83 |     aModel.addSeveralDeconvLayersWithSkip(filter_shapes=filter_shapes, input_channels=input_channels,
 84 |                                   output_channels=output_channels, strides=strides, names=names)
 85 | 
 86 |     aModel.addReshape((batch_size, 8, 257, 128))
 87 |     aModel.addDeconvLayerWithSkip(filter_shape=(3, 33), input_channels=128, output_channels=11, stride=(1, 2, 2, 1),
 88 |                           name='Third_deconv')
 89 |     aModel.addBatchNormalization()
 90 | 
 91 |     aModel.addReshape((batch_size, 11, 257, 32))
 92 | 
 93 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(5, 89), input_channels=32, output_channels=2,
 94 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
 95 | 
 96 | print(aModel.description())
 97 | 
 98 | model_vars = tf.trainable_variables()
 99 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
100 | 
101 | aContextEncoderNetwork = StftGapContextEncoder(model=aModel, batch_size=batch_size, target_model=aTargetModel, window_size=window_size,
102 |                                                gap_length=gap_length, learning_rate=1e-3, name='nat_stft_gap_1_skip')
103 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
104 | 


--------------------------------------------------------------------------------
/utils/legacy/simulations/runNatBig.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | from network.emptyTFGraph import EmptyTfGraph
  5 | 
  6 | sys.path.insert(0, '../')
  7 | import tensorflow as tf
  8 | from tensorflow.contrib import slim
  9 | import socket
 10 | if 'omenx' in socket.gethostname():
 11 |     os.environ["CUDA_VISIBLE_DEVICES"]="0"
 12 | 
 13 | from utils.legacy.stftGapContextEncoder import StftGapContextEncoder
 14 | 
 15 | __author__ = 'Andres'
 16 | 
 17 | tf.reset_default_graph()
 18 | if 'omenx' in socket.gethostname():
 19 |     train_filename = '/store/nati/datasets/Nsynth/train_w5120_g1024_h512.tfrecords'
 20 |     valid_filename = '/store/nati/datasets/Nsynth/valid_w5120_g1024_h512.tfrecords'
 21 | else:
 22 |     train_filename = '/scratch/snx3000/nperraud/data/NSynth/train_w5120_g1024_h512.tfrecords'
 23 |     valid_filename = '/scratch/snx3000/nperraud/data/NSynth/valid_w5120_g1024_h512.tfrecords'    
 24 | 
 25 | window_size = 5120
 26 | gap_length = 1024
 27 | batch_size = 256
 28 | 
 29 | fft_frame_length = 512
 30 | fft_frame_step = 128
 31 | 
 32 | aTargetModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="Target Model")
 33 | 
 34 | with tf.name_scope('Remove_unnecesary_sides_before_stft'):
 35 |     signal = aTargetModel.output()
 36 |     signal_without_unnecesary_sides = signal[:, 1664:3456]
 37 |     aTargetModel.setOutputTo(signal_without_unnecesary_sides)
 38 | aTargetModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)
 39 | aTargetModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 11, 257, 2)
 40 | 
 41 | aModel = EmptyTfGraph(shapeOfInput=(batch_size, window_size), name="context encoder")
 42 | 
 43 | with tf.name_scope('Remove_gap_before_stft'):
 44 |     signal = aModel.output()
 45 |     left_side = signal[:, :2048]
 46 |     right_side = signal[:, 2048+1024:]
 47 |     
 48 |     # This is strange. The window is 5K samples long, the hole 1024 and the 0 pading 384.
 49 |     # Unless signal in in spectrogram. In that case, the code is not very clear. Maybe consider adding comments.
 50 |     left_side_padded = tf.concat((left_side, tf.zeros((batch_size, 384))), axis=1)
 51 |     right_side_padded = tf.concat((tf.zeros((batch_size, 384)), right_side), axis=1)
 52 | 
 53 |     # If you pad them with 0, maybe you also stack them allong axis 2 (one after the other.)
 54 |     signal_without_gap = tf.stack((left_side_padded, right_side_padded), axis=1)  # (256, 2, 2432)
 55 |     aModel.setOutputTo(signal_without_gap)
 56 | 
 57 | aModel.addSTFT(frame_length=fft_frame_length, frame_step=fft_frame_step)  # (256, 2, 16, 257)
 58 | aModel.addReshape((batch_size, 32, 257))
 59 | aModel.divideComplexOutputIntoRealAndImaginaryParts()  # (256, 32, 257, 2)
 60 | aModel.addReshape((batch_size, 16, 257, 4))
 61 | 
 62 | with tf.variable_scope("Encoder"):
 63 |     filter_shapes = [(7, 89), (3, 17), (2, 6),(2, 6), (2, 5), (1, 5), (1, 3)]
 64 |     input_channels = [4, 32, 64, 128, 128, 128, 128]
 65 |     output_channels = [32, 64, 128, 128, 128, 128, 200]
 66 |     strides = [[1, 2, 2, 1], [1, 2, 3, 1], [1, 1, 1, 1],[1, 2, 3, 1], [1, 1, 1, 1], [1, 1, 2, 1], [1, 1, 1, 1]]
 67 |     names = ['First_Conv', 'Second_Conv', 'Third_Conv', 'Third_Conv_b', 'Fourth_Conv','Fourth_Conv_b', 'Fifth_Conv']
 68 |     aModel.addSeveralConvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
 69 |                                 output_channels=output_channels, strides=strides, names=names)
 70 | 
 71 | aModel.addReshape((batch_size, 3200))
 72 | aModel.addFullyConnectedLayer(3200, 2048, 'Fully')
 73 | aModel.addRelu()
 74 | aModel.addBatchNormalization()
 75 | aModel.addReshape((batch_size, 8, 8, 32))
 76 | 
 77 | with tf.variable_scope("Decoder"):
 78 |     filter_shapes = [(5, 5), (5, 5), (5, 5)]
 79 |     input_channels = [32, 64, 64]
 80 |     output_channels = [64, 64, 257]
 81 |     strides = [[1, 2, 2, 1],[1, 1, 1, 1],[1, 2, 2, 1]]
 82 |     names = ['First_Deconv', 'First_Deconv_b','Second_Deconv']
 83 |     aModel.addSeveralDeconvLayers(filter_shapes=filter_shapes, input_channels=input_channels,
 84 |                                   output_channels=output_channels, strides=strides, names=names)
 85 | 
 86 |     aModel.addReshape((batch_size, 8, 257, 128))
 87 |     aModel.addDeconvLayer(filter_shape=(3, 33), input_channels=128, output_channels=11, stride=(1, 2, 2, 1),
 88 |                           name='Third_deconv')
 89 |     aModel.addBatchNormalization()
 90 | 
 91 |     aModel.addReshape((batch_size, 11, 257, 32))
 92 | 
 93 |     aModel.addDeconvLayerWithoutNonLin(filter_shape=(5, 89), input_channels=32, output_channels=2,
 94 |                                        stride=(1, 1, 1, 1), name="Last_Deconv")
 95 | 
 96 | print(aModel.description())
 97 | 
 98 | model_vars = tf.trainable_variables()
 99 | slim.model_analyzer.analyze_vars(model_vars, print_info=True)
100 | 
101 | aContextEncoderNetwork = StftGapContextEncoder(model=aModel, batch_size=batch_size, target_model=aTargetModel, window_size=window_size,
102 |                                                gap_length=gap_length, learning_rate=1e-3, name='nat_stft_gap_1_big')
103 | aContextEncoderNetwork.train(train_filename, valid_filename, num_steps=1e6)
104 | 


--------------------------------------------------------------------------------
/architecture/channelWiseContextEncoderArchitecture.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | from architecture.architecture import Architecture
  4 | from network.tfGraph import TFGraph
  5 | 
  6 | __author__ = 'Andres'
  7 | 
  8 | 
  9 | class ChannelWiseContextEncoderArchitecture(Architecture):
 10 |     def __init__(self, inputShape, encoderParams, decoderParams, fullyParams):
 11 |         with tf.variable_scope("ContextEncoderArchitecture"):
 12 |             self._inputShape = inputShape
 13 |             self._encoderParams = encoderParams
 14 |             self._decoderParams = decoderParams
 15 |             self._fullyParams = fullyParams
 16 |             super().__init__()
 17 | 
 18 |     def inputShape(self):
 19 |         return self._inputShape
 20 | 
 21 |     def _lossGraph(self):
 22 |         with tf.variable_scope("Loss"):
 23 |             targetSquaredNorm = tf.reduce_sum(tf.square(self._target), axis=[1, 2, 3])
 24 | 
 25 |             error = self._target - self._output
 26 |             error_per_example = tf.reduce_sum(tf.square(error), axis=[1, 2, 3])
 27 | 
 28 |             reconstruction_loss = 0.5 * tf.reduce_sum(error_per_example * (1 + 5 / (targetSquaredNorm+1e-4)))
 29 |             lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) * 1e-2
 30 |             total_loss = tf.add_n([reconstruction_loss, lossL2])
 31 | 
 32 |             total_loss_summary = tf.summary.scalar("total_loss", total_loss)
 33 |             l2_loss_summary = tf.summary.scalar("lossL2", lossL2)
 34 |             rec_loss_summary = tf.summary.scalar("reconstruction_loss", reconstruction_loss)
 35 |             self._lossSummaries = tf.summary.merge([rec_loss_summary, l2_loss_summary, total_loss_summary])
 36 | 
 37 |             return total_loss
 38 | 
 39 |     def _network(self, data):
 40 |         encodedData = self._encode(data)
 41 |         connectedData = self._fullyConnect(encodedData)
 42 |         decodedData = self._decode(connectedData)
 43 |         return decodedData
 44 | 
 45 |     def _encode(self, data):
 46 |         with tf.variable_scope("Encoder"):
 47 |             encoder = TFGraph(data, self._isTraining, "Encoder")
 48 | 
 49 |             encoder.addSeveralConvLayers(filter_shapes=self._encoderParams.filterShapes(),
 50 |                                          input_channels=self._encoderParams.inputChannels(),
 51 |                                          output_channels=self._encoderParams.outputChannels(),
 52 |                                          strides=self._encoderParams.strides(),
 53 |                                          names=self._encoderParams.convNames())
 54 |             return encoder.output()
 55 | 
 56 |     def _fullyConnect(self, data):
 57 |         with tf.variable_scope("Fully"):
 58 |             fullyConnected = TFGraph(data, self._isTraining, "Fully")
 59 |             fullyConnected.addChannelWiseFullyConnectedLayer('Fully')
 60 |             fullyConnected.addRelu()
 61 |             fullyConnected.addBatchNormalization()
 62 |             fullyConnected.addReshape(self._fullyParams.outputShape())
 63 |             return fullyConnected.output()
 64 | 
 65 |     def _decode(self, data):
 66 |         with tf.variable_scope("Decoder"):
 67 |             decoder = TFGraph(data, self._isTraining, "Decoder")
 68 | 
 69 |             decoder.addSeveralDeconvLayers(filter_shapes=self._decoderParams.filterShapes()[0:-2],
 70 |                                            input_channels=self._decoderParams.inputChannels()[0:-2],
 71 |                                            output_channels=self._decoderParams.outputChannels()[0:-2],
 72 |                                            strides=self._decoderParams.strides()[0:-2],
 73 |                                            names=self._decoderParams.convNames()[0:-2])
 74 | 
 75 |             currentShape = decoder.outputShape()
 76 |             constantForReshape = int(4 * currentShape[1] / currentShape[2])
 77 |             decoder.addReshape((currentShape[0], int(currentShape[1] / constantForReshape),
 78 |                                 currentShape[3], currentShape[2] * constantForReshape))
 79 | 
 80 |             decoder.addDeconvLayer(filter_shape=self._decoderParams.filterShapes()[-2],
 81 |                                    input_channels=currentShape[2] * constantForReshape,
 82 |                                    output_channels=self._decoderParams.outputChannels()[-2],
 83 |                                    stride=self._decoderParams.strides()[-2],
 84 |                                    name=self._decoderParams.convNames()[-2])
 85 |             decoder.addBatchNormalization()
 86 | 
 87 |             currentShape = decoder.outputShape()
 88 |             constantForReshape = int(self._decoderParams.strides()[-2][2])
 89 | 
 90 |             decoder.addReshape((currentShape[0], currentShape[3],
 91 |                                 int(currentShape[2] / constantForReshape),
 92 |                                 currentShape[1] * constantForReshape))
 93 | 
 94 |             decoder.addDeconvLayerWithoutNonLin(filter_shape=self._decoderParams.filterShapes()[-1],
 95 |                                                 input_channels=currentShape[1] * constantForReshape,
 96 |                                                 output_channels=self._decoderParams.outputChannels()[-1],
 97 |                                                 stride=self._decoderParams.strides()[-1],
 98 |                                                 name=self._decoderParams.convNames()[-1])
 99 |             return decoder.output()
100 | 


--------------------------------------------------------------------------------
/architecture/contextEncoderArchitecture.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | from architecture.architecture import Architecture
  4 | from network.tfGraph import TFGraph
  5 | 
  6 | __author__ = 'Andres'
  7 | 
  8 | 
  9 | class ContextEncoderArchitecture(Architecture):
 10 |     def __init__(self, inputShape, encoderParams, decoderParams, fullyParams):
 11 |         with tf.variable_scope("ContextEncoderArchitecture"):
 12 |             self._inputShape = inputShape
 13 |             self._encoderParams = encoderParams
 14 |             self._decoderParams = decoderParams
 15 |             self._fullyParams = fullyParams
 16 |             super().__init__()
 17 | 
 18 |     def inputShape(self):
 19 |         return self._inputShape
 20 | 
 21 |     def _lossGraph(self):
 22 |         with tf.variable_scope("Loss"):
 23 |             targetSquaredNorm = tf.reduce_sum(tf.square(self._target), axis=[1, 2, 3])
 24 | 
 25 |             error = self._target - self._output
 26 |             error_per_example = tf.reduce_sum(tf.square(error), axis=[1, 2, 3])
 27 | 
 28 |             reconstruction_loss = 0.5 * tf.reduce_sum(error_per_example * (1 + 5 / (targetSquaredNorm+1e-4)))
 29 |             lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) * 1e-2
 30 |             total_loss = tf.add_n([reconstruction_loss, lossL2])
 31 | 
 32 |             total_loss_summary = tf.summary.scalar("total_loss", total_loss)
 33 |             l2_loss_summary = tf.summary.scalar("lossL2", lossL2)
 34 |             rec_loss_summary = tf.summary.scalar("reconstruction_loss", reconstruction_loss)
 35 |             self._lossSummaries = tf.summary.merge([rec_loss_summary, l2_loss_summary, total_loss_summary])
 36 | 
 37 |             return total_loss
 38 | 
 39 |     def _network(self, data):
 40 |         encodedData = self._encode(data)
 41 |         connectedData = self._fullyConnect(encodedData)
 42 |         decodedData = self._decode(connectedData)
 43 |         return decodedData
 44 | 
 45 |     def _encode(self, data):
 46 |         with tf.variable_scope("Encoder"):
 47 |             encoder = TFGraph(data, self._isTraining, "Encoder")
 48 | 
 49 |             encoder.addSeveralConvLayers(filter_shapes=self._encoderParams.filterShapes(),
 50 |                                          input_channels=self._encoderParams.inputChannels(),
 51 |                                          output_channels=self._encoderParams.outputChannels(),
 52 |                                          strides=self._encoderParams.strides(),
 53 |                                          names=self._encoderParams.convNames())
 54 |             return encoder.output()
 55 | 
 56 |     def _fullyConnect(self, data):
 57 |         with tf.variable_scope("Fully"):
 58 |             fullyConnected = TFGraph(data, self._isTraining, "Fully")
 59 | 
 60 |             fullyConnected.addReshape((self._fullyParams.batchSize(), self._fullyParams.inputChannels()))
 61 |             fullyConnected.addFullyConnectedLayer(self._fullyParams.inputChannels(),
 62 |                                                   self._fullyParams.outputChannels(),
 63 |                                                   'Fully')
 64 |             fullyConnected.addRelu()
 65 |             fullyConnected.addBatchNormalization()
 66 |             fullyConnected.addReshape(self._fullyParams.outputShape())
 67 |             return fullyConnected.output()
 68 | 
 69 |     def _decode(self, data):
 70 |         with tf.variable_scope("Decoder"):
 71 |             decoder = TFGraph(data, self._isTraining, "Decoder")
 72 | 
 73 |             decoder.addSeveralDeconvLayers(filter_shapes=self._decoderParams.filterShapes()[0:-2],
 74 |                                            input_channels=self._decoderParams.inputChannels()[0:-2],
 75 |                                            output_channels=self._decoderParams.outputChannels()[0:-2],
 76 |                                            strides=self._decoderParams.strides()[0:-2],
 77 |                                            names=self._decoderParams.convNames()[0:-2])
 78 | 
 79 |             currentShape = decoder.outputShape()
 80 |             constantForReshape = int(4 * currentShape[1] / currentShape[2])
 81 |             decoder.addReshape((currentShape[0], int(currentShape[1] / constantForReshape),
 82 |                                 currentShape[3], currentShape[2] * constantForReshape))
 83 | 
 84 |             decoder.addDeconvLayer(filter_shape=self._decoderParams.filterShapes()[-2],
 85 |                                    input_channels=currentShape[2] * constantForReshape,
 86 |                                    output_channels=self._decoderParams.outputChannels()[-2],
 87 |                                    stride=self._decoderParams.strides()[-2],
 88 |                                    name=self._decoderParams.convNames()[-2])
 89 |             decoder.addBatchNormalization()
 90 | 
 91 |             currentShape = decoder.outputShape()
 92 |             constantForReshape = int(self._decoderParams.strides()[-2][2])
 93 | 
 94 |             decoder.addReshape((currentShape[0], currentShape[3],
 95 |                                 int(currentShape[2] / constantForReshape),
 96 |                                 currentShape[1] * constantForReshape))
 97 | 
 98 |             decoder.addDeconvLayerWithoutNonLin(filter_shape=self._decoderParams.filterShapes()[-1],
 99 |                                                 input_channels=currentShape[1] * constantForReshape,
100 |                                                 output_channels=self._decoderParams.outputChannels()[-1],
101 |                                                 stride=self._decoderParams.strides()[-1],
102 |                                                 name=self._decoderParams.convNames()[-1])
103 |             return decoder.output()
104 | 


--------------------------------------------------------------------------------
/utils/legacy/stftGapContextEncoder.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | from utils.legacy.contextEncoder import ContextEncoderNetwork
  7 | from utils.strechableNumpyArray import StrechableNumpyArray
  8 | 
  9 | __author__ = 'Andres'
 10 | 
 11 | 
 12 | class StftGapContextEncoder(ContextEncoderNetwork):
 13 |     def __init__(self, model, batch_size, target_model, window_size, gap_length, learning_rate, name):
 14 |         self._target_model = target_model
 15 |         super(StftGapContextEncoder, self).__init__(model, batch_size, window_size, gap_length, learning_rate,
 16 |                                                          name)
 17 |         self._sides = tf.placeholder(tf.float32, shape=(batch_size, self._window_size - self._gap_length), name='sides')
 18 |         self._reconstructedSignal = self._reconstructSignal(self._sides, self.gap_data)
 19 | 
 20 |     def trainSNR(self):
 21 |         return tf.reduce_mean(self._pavlovs_SNR(self._target_model.output(), self._reconstructed_input_data,
 22 |                                                      onAxis=[1, 2, 3]))
 23 | 
 24 |     def _reconstructSignal(self, sides, gaps):
 25 |         signal_length = self._window_size - self._gap_length
 26 |         first_half = sides[:, :signal_length // 2]
 27 |         second_half = sides[:, signal_length // 2:]
 28 | 
 29 |         reconstructed_signal = tf.concat([first_half, gaps, second_half], axis=1)
 30 |         return reconstructed_signal
 31 | 
 32 |     def _loss_graph(self):
 33 |         with tf.variable_scope("Loss"):
 34 |             gap_stft = self._target_model.output()
 35 | 
 36 |             norm_orig = self._squaredEuclideanNorm(gap_stft, onAxis=[1, 2, 3])
 37 |             norm_orig_summary = tf.summary.scalar("norm_orig", tf.reduce_min(norm_orig))
 38 | 
 39 |             error = gap_stft - self._reconstructed_input_data
 40 |             # Nati comment: here you should use only one reduce sum function
 41 |             error_per_example = tf.reduce_sum(tf.square(error), axis=[1, 2, 3])
 42 | 
 43 |             reconstruction_loss = 0.5 * tf.reduce_sum(error_per_example * (1 + 5 / (norm_orig+1e-2)))
 44 | 
 45 |             rec_loss_summary = tf.summary.scalar("reconstruction_loss", reconstruction_loss)
 46 | 
 47 |             trainable_vars = tf.trainable_variables()
 48 |             lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars if 'bias' not in v.name]) * 1e-2
 49 |             l2_loss_summary = tf.summary.scalar("lossL2", lossL2)
 50 | 
 51 |             total_loss = tf.add_n([reconstruction_loss, lossL2])
 52 |             total_loss_summary = tf.summary.scalar("total_loss", total_loss)
 53 | 
 54 |             self._lossSummaries = tf.summary.merge([rec_loss_summary, l2_loss_summary, norm_orig_summary, total_loss_summary])
 55 | 
 56 |             return total_loss
 57 | 
 58 |     def reconstructAudio(self, audios, model_num=None, max_batchs=200):
 59 |         with tf.Session() as sess:
 60 |             if model_num is not None:
 61 |                 path = self.modelsPath(model_num)
 62 |             else:
 63 |                 path = self.modelsPath(self._initial_model_num)
 64 |             saver = tf.train.Saver()
 65 |             saver.restore(sess, path)
 66 |             print("Model restored.")
 67 | 
 68 |             batches_count = int(len(audios) / self._batch_size)
 69 | 
 70 |             reconstructed = StrechableNumpyArray()
 71 |             for batch_num in range(min(batches_count, max_batchs)):
 72 |                 batch_data = audios[batch_num * self._batch_size:batch_num * self._batch_size + self._batch_size]
 73 |                 feed_dict = {self._model.input(): batch_data, self._model.isTraining(): False}
 74 |                 reconstructed_input = sess.run([self._reconstructed_input_data],
 75 |                                                          feed_dict=feed_dict)
 76 |                 reconstructed.append(np.reshape(reconstructed_input, (-1)))
 77 |             reconstructed = reconstructed.finalize()
 78 |             output_shape = self._target_model.output().shape.as_list()
 79 |             output_shape[0] = -1
 80 |             reconstructed_stft = np.reshape(reconstructed, output_shape)
 81 |             return reconstructed_stft
 82 | 
 83 |     def _reconstruct(self, sess, data_reader, max_steps):
 84 |         data_reader.start()
 85 |         reconstructed = StrechableNumpyArray()
 86 |         out_gaps = StrechableNumpyArray()
 87 |         for batch_num in range(max_steps):
 88 |             try:
 89 |                 sides, gaps = data_reader.dataOperation(session=sess)
 90 |             except StopIteration:
 91 |                 print(batch_num)
 92 |                 print("rec End of queue!")
 93 |                 break
 94 |             reconstructed_signal = sess.run(self._reconstructedSignal,
 95 |                                             feed_dict={self._sides: sides, self.gap_data: gaps})
 96 |             gap_stft = self._target_model.output()
 97 | 
 98 |             feed_dict = {self._model.input(): reconstructed_signal, self._target_model.input(): reconstructed_signal,
 99 |                          self._model.isTraining(): False}
100 |             reconstructed_input, original = sess.run([self._reconstructed_input_data, gap_stft], feed_dict=feed_dict)
101 |             out_gaps.append(np.reshape(original, (-1)))
102 |             reconstructed.append(np.reshape(reconstructed_input, (-1)))
103 | 
104 |         output_shape = self._target_model.output().shape.as_list()
105 |         output_shape[0] = -1
106 |         reconstructed = reconstructed.finalize()
107 |         reconstructed = np.reshape(reconstructed, output_shape)
108 |         out_gaps = out_gaps.finalize()
109 |         out_gaps = np.reshape(out_gaps, output_shape)
110 | 
111 |         data_reader.finish()
112 | 
113 |         return reconstructed, out_gaps
114 | 
115 |     def _evaluateValidSNR(self, summaries_dict, validReader, evalWriter, writer, sess, step):
116 |         reconstructed, out_gaps = self._reconstruct(sess, validReader, max_steps=8)
117 |         step_valid_SNR = evalWriter.evaluateImages(reconstructed, out_gaps, self._initial_model_num + step)
118 |         validSNRSummaryToWrite = sess.run(summaries_dict['valid_SNR_summary'],
119 |                                           feed_dict={summaries_dict['valid_SNR']: step_valid_SNR})
120 |         writer.add_summary(validSNRSummaryToWrite, self._initial_model_num + step)
121 | 
122 |     def _evaluatePlotSummary(self, plot_summary, gaps, feed_dict, writer, sess, step):
123 |         pass
124 | 
125 |     def _trainingFeedDict(self, sides, gaps, sess):
126 |         rec = sess.run(self._reconstructedSignal, feed_dict={self._sides: sides, self.gap_data: gaps})
127 |         return {self._model.input(): rec, self._target_model.input(): rec, self._model.isTraining(): True}
128 | 
129 | 
130 | def get_trailing_number(s):
131 |     m = re.search(r'\d+$', s)
132 |     return int(m.group()) if m else None
133 | 


--------------------------------------------------------------------------------
/utils/legacy/notebooks/test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "scrolled": false
  7 |    },
  8 |    "source": [
  9 |     "# Context Encoder \n",
 10 |     "\n",
 11 |     "Let's begin by importing tensorflow and the network"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import tensorflow as tf\n",
 21 |     "from network.contextEncoder import ContextEncoderNetwork"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "Now we initialize the context encoder network and select the step we want to use for the reconstruction."
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "tf.reset_default_graph()\n",
 38 |     "\n",
 39 |     "train_filename = 'train_full_w5120_g1024_h512_19404621.tfrecords'\n",
 40 |     "valid_filename = 'valid_full_w5120_g1024_h512_ex913967.tfrecords'\n",
 41 |     "\n",
 42 |     "aContextEncoderNetwork = ContextEncoderNetwork(batch_size=256, window_size=5120, gap_length=1024, \n",
 43 |     "                                             learning_rate=1e-5, name='test')"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "best_step = 506000 "
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "test_filename = 'test_full_w5120_g1024_h512_ex298385.tfrecords'\n",
 62 |     "reconstructed, out_gaps = anAutoEncoderNetwork.reconstruct(test_filename, best_step, max_steps=248)\n",
 63 |     "\n",
 64 |     "evaluator = EvaluationWriter(anAutoEncoderNetwork._name + str(best_step) + '_test.xlsx')\n",
 65 |     "evaluator.evaluate(reconstructed, out_gaps, best_step)\n",
 66 |     "evaluator.save()"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from matplotlib.backends.backend_pdf import PdfPages\n",
 76 |     "pp = PdfPages(anAutoEncoderNetwork._name + '_' + str(best_step) + '_test.pdf')\n",
 77 |     "\n",
 78 |     "pylab.rcParams['figure.figsize'] = (14, 28)\n",
 79 |     "f, axarr = plt.subplots(8, 2, sharey='row')\n",
 80 |     "\n",
 81 |     "stop_value = min(256, len(out_gaps)+1)\n",
 82 |     "for i in range(0, stop_value):   \n",
 83 |     "    if i is not 0 and i%8 is 0:\n",
 84 |     "        pp.savefig()\n",
 85 |     "        f, axarr = plt.subplots(8, 2, sharey='row')\n",
 86 |     "    axarr[i%8, 0].plot(out_gaps[i%256])\n",
 87 |     "    axarr[i%8, 1].plot(reconstructed[i%256]) \n",
 88 |     "    \n",
 89 |     "pp.savefig()\n",
 90 |     "pp.close()"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "def _pavlovs_SNR(y_orig, y_inp):\n",
100 |     "    norm_y_orig = np.linalg.norm(y_orig) + 1e-10\n",
101 |     "    norm_y_orig_minus_y_inp = np.linalg.norm(y_orig - y_inp)\n",
102 |     "    return 10 * np.log10((abs(norm_y_orig ** 2)) / abs((norm_y_orig_minus_y_inp ** 2)))\n",
103 |     "\n",
104 |     "def _euclideanNorm(vector):\n",
105 |     "        squared = np.square(vector)\n",
106 |     "        summed = np.sum(squared, axis=1)\n",
107 |     "        return np.sqrt(summed + 1e-10)\n",
108 |     "    \n",
109 |     "fake_a = (reconstructed - 0.5) * 2\n",
110 |     "gap = (out_gaps - 0.5) * 2\n",
111 |     "\n",
112 |     "SNRs = np.zeros((len(fake_a),))\n",
113 |     "for index, signal in enumerate(fake_a):\n",
114 |     "    SNRs[index] = _pavlovs_SNR(gap[index], fake_a[index])\n",
115 |     "\n",
116 |     "norm_orig = _euclideanNorm(gap)\n",
117 |     "error = (gap - fake_a)\n",
118 |     "reconstruction_loss = 0.5 * np.sum(np.square(error), axis=1) * (1 + 1 / norm_orig)\n"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "pylab.rcParams['figure.figsize'] = (14, 8)\n",
128 |     "\n",
129 |     "import scipy.stats as stats\n",
130 |     "sorted_SNR = sorted(SNRs)\n",
131 |     "\n",
132 |     "fit = stats.norm.pdf(sorted_SNR, np.mean(sorted_SNR), np.std(sorted_SNR))  #this is a fitting indeed\n",
133 |     "\n",
134 |     "plt.plot(sorted_SNR,fit,'-o')\n",
135 |     "\n",
136 |     "plt.hist(sorted_SNR, 50, normed=True)    \n"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "plt.scatter(SNRs, reconstruction_loss)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "pylab.rcParams['figure.figsize'] = (14, 28)\n",
155 |     "\n",
156 |     "examples = np.where((SNRs<-10) & (reconstruction_loss<5))[0]\n",
157 |     "\n",
158 |     "f, axarr = plt.subplots(8, 2, sharey='row')\n",
159 |     "\n",
160 |     "for index, example in enumerate(examples):   \n",
161 |     "    if index is not 0 and index%8 is 0:\n",
162 |     "        f, axarr = plt.subplots(8, 2, sharey='row')\n",
163 |     "    axarr[index%8, 0].plot(out_gaps[example])\n",
164 |     "    axarr[index%8, 1].plot(reconstructed[example]) \n"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "metadata": {},
171 |    "outputs": [],
172 |    "source": [
173 |     "examples = np.where((SNRs<1) & (SNRs>-1))[0][:256]\n",
174 |     "\n",
175 |     "f, axarr = plt.subplots(8, 2, sharey='row')\n",
176 |     "\n",
177 |     "for index, example in enumerate(examples):   \n",
178 |     "    if index is not 0 and index%8 is 0:\n",
179 |     "        f, axarr = plt.subplots(8, 2, sharey='row')\n",
180 |     "    axarr[index%8, 0].plot(out_gaps[example])\n",
181 |     "    axarr[index%8, 1].plot(reconstructed[example]) \n",
182 |     "    "
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "examples = np.where((SNRs>28))[0][:256]\n",
192 |     "\n",
193 |     "f, axarr = plt.subplots(8, 2, sharey='row')\n",
194 |     "\n",
195 |     "for index, example in enumerate(examples):   \n",
196 |     "    if index%8 == 0:\n",
197 |     "        f, axarr = plt.subplots(8, 2, sharey='row')\n",
198 |     "    axarr[index%8, 0].plot(out_gaps[example])\n",
199 |     "    axarr[index%8, 1].plot(reconstructed[example]) \n",
200 |     "    "
201 |    ]
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "kernelspec": {
206 |    "display_name": "Python 3",
207 |    "language": "python",
208 |    "name": "python3"
209 |   },
210 |   "language_info": {
211 |    "codemirror_mode": {
212 |     "name": "ipython",
213 |     "version": 3
214 |    },
215 |    "file_extension": ".py",
216 |    "mimetype": "text/x-python",
217 |    "name": "python",
218 |    "nbconvert_exporter": "python",
219 |    "pygments_lexer": "ipython3",
220 |    "version": "3.6.2"
221 |   }
222 |  },
223 |  "nbformat": 4,
224 |  "nbformat_minor": 2
225 | }
226 | 


--------------------------------------------------------------------------------
/SpecDivExperimentMag.m:
--------------------------------------------------------------------------------
  1 | 
  2 | %% STFT parameters 
  3 | 
  4 | win = {'hann',512,'peak'};
  5 | dual = {'dual',win};
  6 | M = 512; a = M/4;
  7 | flag = 'timeinv';
  8 | gamma = pghi_findgamma(win);
  9 | 
 10 | %% Obtain data - THIS MUST BE UPDATED ONCE STUFF IS AVAILABLE!
 11 | 
 12 | 
 13 | % load('magnitude_trainedOnFma_step723261_8mslater.mat')
 14 | % tfdata_amp = magnitudeMat;
 15 | % clear magnitudeMat;
 16 | % load('magnitude_trainedOnFma_step723261_8msbefore.mat')
 17 | % t = linspace(0, pi/2, 7)';
 18 | % sqCos = permute(repmat(cos(t).^2, 1, 3328, 257), [2 1 3]);
 19 | % tfdata_amp = [magnitudeMat(:, 3:4, :) (tfdata_amp(:, 1:end-4, :).*sqCos+fliplr(sqCos).*magnitudeMat(:, 5:end, :)) tfdata_amp(:, end-3:end-2, :)];
 20 | % clear magnitudeMat;
 21 | % 
 22 | 
 23 | load('magnitude_trainedOnFma_step723261_8mslater.mat')
 24 | later = magnitudeMat;
 25 | clear magnitudeMat;
 26 | load('magnitude_trainedOnFma_step723261_8msbefore.mat')
 27 | before = magnitudeMat;
 28 | clear magnitudeMat;
 29 | load('magnitude_trainedOnFma_step723261.mat')
 30 | central = magnitudeMat;
 31 | clear magnitudeMat;
 32 | 
 33 | tfdata_amp = [(before(:, 3:4, :)+central(:, 1:2, :))/2 (later(:, 1:end-4, :)+central(:, 3:end-2,:)+before(:, 5:end, :))/3 (central(:, end-1:end,:)+later(:, end-3:end-2, :))/2];
 34 | % tfdata_amp = central;
 35 | 
 36 | load('FMA_test_windows_16k.mat'); 
 37 | alldata_ori = fma_test(1:length(tfdata_amp),5121:5120*2).';
 38 | clear fma_test;
 39 | 
 40 | load('CE_FMAonly_step2547124.mat'); 
 41 | alldata_rim = CEMat(1:length(tfdata_amp),:).';
 42 | clear generatedTimeSignals;
 43 | 
 44 | load('fma_lpcrec_16k.mat')
 45 | alldata_lpc = out(:,:).';
 46 | clear out;
 47 | 
 48 | %num_data = 10;
 49 | num_data = size(alldata_ori,2);
 50 | num_methods = 10;
 51 | 
 52 | L = 5120;
 53 | 
 54 | num_tframes = 40; 
 55 | num_unknown = 11; 
 56 | 
 57 | %% Prepare arrays for results
 58 | 
 59 | SpecDiv = zeros(num_data,num_methods);
 60 | SNR = zeros(num_data,num_methods);
 61 | 
 62 | mask = zeros(M/2+1,L/a);
 63 | mask(:,[1:15,end-13:end]) = 1;
 64 | 
 65 | known_idx = [1:15,num_tframes-13:num_tframes];
 66 | idx = 19:(num_tframes-17);
 67 | idx = 20:(num_tframes-18); % for 48ms
 68 | %idx = 1:40;
 69 | 
 70 | known_tidx = [1:(14*a),(L-12*a)+1:L];
 71 | tidx = (16*a)+1:(L-16*a);
 72 | %tidx = 1:L;
 73 | 
 74 | %% Compute error measures
 75 | 
 76 | for kk = 1:num_data    
 77 |     % Load waveform data
 78 |     data_ori = alldata_ori(:,kk);
 79 |     data_ori_nomean = data_ori;% - mean(data_ori);
 80 |     data_lpc = alldata_lpc(:,kk);
 81 |     data_rim = alldata_rim(:,kk);
 82 |    
 83 |     c_ori = dgtreal(data_ori,win,a,M,L,flag); % DGT of original
 84 |     c_angle_ori = angle(c_ori); % Phase of original DGT
 85 | 
 86 |     % LPC
 87 | %     SNR(kk,1) = 20*log10(norm(data_ori_nomean(tidx))/norm(data_lpc(tidx)-data_ori(tidx)));
 88 | %     c_lpc = dgtreal(data_lpc,win,a,M,L,flag);
 89 | %     SpecDiv(kk,1) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_lpc(:,idx)));   
 90 |      
 91 |     % Real and Imag
 92 | %     SNR(kk,2) = 20*log10(norm(data_ori_nomean(tidx))/norm(data_rim(tidx)-data_ori(tidx)));
 93 | %     c_rim = dgtreal(data_rim,win,a,M,L,flag);
 94 | %     SpecDiv(kk,2) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_rim(:,idx)));  
 95 | %     % Real and Imag + Original Phase
 96 | %     c_rim_tp = abs(c_rim).*exp(1i*c_angle_ori);
 97 | %     f_rim_tp = idgtreal(c_rim_tp,dual,a,M,flag);
 98 | %     SNR(kk,3) = 20*log10(norm(data_ori_nomean(tidx))/norm(f_rim_tp(tidx)-data_ori(tidx)));
 99 | %     c_rim_rec = dgtreal(f_rim_tp,win,a,M,L,flag);
100 | %     SpecDiv(kk,3) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_rim_rec(:,idx)));  
101 | %     % Real and Imag + PGHI
102 | %     c_rim_pghi = pghi(c_rim,pghi_findgamma(win),a,M,mask,flag);
103 | %     f_rim_pghi = idgtreal(c_rim_pghi,dual,a,M,flag);
104 | %     SNR(kk,4) = 20*log10(norm(data_ori_nomean(tidx))/norm(f_rim_pghi(tidx)-data_ori(tidx)));
105 | %     c_rim_pghi_rec = dgtreal(f_rim_pghi,win,a,M,L,flag);
106 | %     SpecDiv(kk,4) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_rim_pghi_rec(:,idx))); 
107 | %     % Real and Imag + FGLIM
108 |     c_rim_gla = masked_gla(c_rim,dual,a,M,mask,flag,'fgla','input');
109 |     f_rim_gla = idgtreal(c_rim_gla,dual,a,M,flag);
110 |     SNR(kk,5) = 20*log10(norm(data_ori_nomean(tidx))/norm(f_rim_gla(tidx)-data_ori(tidx)));
111 |     c_rim_gla_rec = dgtreal(f_rim_gla,win,a,M,L,flag);
112 |     SpecDiv(kk,5) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_rim_gla_rec(:,idx))); 
113 | %     % Real and Imag + PGHI + FGLIM
114 | %     c_rim_pgla = gla(c_rim_pghi,dual,a,M,flag,'fgla','input');
115 | %     SpecDiv(kk,6) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_rim_pgla(:,idx))); 
116 | %     f_rim_pgla = idgtreal(c_rim_pgla,dual,a,M,flag);
117 | %     SNR(kk,6) = 20*log10(norm(data_ori_nomean(tidx))/norm(f_rim_pgla(tidx)-data_ori(tidx)));
118 | %     c_rim_pgla_rec = dgtreal(f_rim_pgla,win,a,M,L,flag);
119 | %     SpecDiv(kk,6) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_rim_pgla_rec(:,idx))); 
120 |     
121 |     % Amplitude (original phase)
122 |     c_amp = abs(c_ori); % Initialize magnitude
123 |     c_amp(:,16:(num_tframes-14)) = squeeze(tfdata_amp(kk,:,:)).'; % Set inner part to proposed solution
124 |     c_amp_tp = abs(c_amp).*exp(1i*c_angle_ori);
125 |     f_amp_tp = idgtreal(c_amp_tp,dual,a,M,flag);
126 |     SNR(kk,7) = 20*log10(norm(data_ori_nomean(tidx))/norm(f_amp_tp(tidx)-data_ori(tidx)));
127 |     c_amp_rec = dgtreal(f_amp_tp,win,a,M,L,flag);
128 |     SpecDiv(kk,7) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_amp_rec(:,idx)));  
129 |     % Amplitude + PGHI
130 |     kphase = (c_angle_ori.*mask);%+2*pi*rand(M/2+1,num_tframes).*(1-mask));
131 |     c_amp_kphase = c_amp.*exp(1i*kphase);
132 |     c_amp_pghi = pghi(c_amp_kphase,gamma,a,M,mask,flag);
133 |     f_amp_pghi = idgtreal(c_amp_pghi,dual,a,M,flag);
134 |     SNR(kk,8) = 20*log10(norm(data_ori_nomean(tidx))/norm(f_amp_pghi(tidx)-data_ori(tidx)));
135 |     c_amp_pghi_rec = dgtreal(f_amp_pghi,win,a,M,L,flag);
136 |     SpecDiv(kk,8) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_amp_pghi_rec(:,idx))); 
137 |     % Amplitude + FGLIM
138 | %     c_amp_gla = masked_gla(c_amp_kphase,dual,a,M, mask,flag,'fgla','input');
139 | %     f_amp_gla = idgtreal(c_amp_gla,dual,a,M,flag);
140 | %     SNR(kk,9) = 20*log10(norm(data_ori_nomean(tidx))/norm(f_amp_gla(tidx)-data_ori(tidx)));
141 | %     c_amp_gla_rec = dgtreal(f_amp_gla,win,a,M,L,flag);
142 | %     SpecDiv(kk,9) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_amp_gla_rec(:,idx))); 
143 |     % Amplitude + PGHI + FGLIM
144 |     c_amp_pgla = masked_gla(c_amp_pghi,dual,a,M, mask,flag,'fgla','input');
145 |     f_amp_pgla = idgtreal(c_amp_pgla,dual,a,M,flag);
146 |     SNR(kk,10) = 20*log10(norm(data_ori_nomean(tidx))/norm(f_amp_pgla(tidx)-data_ori(tidx)));
147 |     c_amp_pgla_rec = dgtreal(f_amp_pgla,win,a,M,L,flag);
148 |     SpecDiv(kk,10) = 20*log10(1/magnitudeerr(c_ori(:,idx),c_amp_pgla_rec(:,idx)));     
149 |     
150 |     if mod(kk,200) == 0
151 |         fprintf('-Iteration %d-',kk);
152 |     end
153 | end
154 | 
155 | maxSNR = max(SNR);
156 | stdSNR = std(SNR);
157 | minSNR = min(SNR);
158 | meanSNR = mean(SNR);
159 | medianSNR = median(SNR);
160 | quant25SNR = quantile(SNR,0.25);
161 | quant75SNR = quantile(SNR,0.75);
162 | 
163 | maxSpecDiv= max(SpecDiv);
164 | stdSpecDiv = std(SpecDiv);
165 | minSpecDiv = min(SpecDiv);
166 | meanSpecDiv = mean(SpecDiv);
167 | medianSpecDiv = median(SpecDiv);
168 | quant25SpecDiv = quantile(SpecDiv,0.25);
169 | quant75SpecDiv = quantile(SpecDiv,0.75);
170 | 
171 | save('MethodComparison.mat','meanSNR','minSNR','quant25SNR','medianSNR',...
172 |     'quant75SNR','maxSNR','meanSpecDiv','minSpecDiv','quant25SpecDiv',...
173 |     'medianSpecDiv','quant75SpecDiv','maxSpecDiv');
174 | 
175 | SNRstatsFMA = [meanSNR;stdSNR;minSNR;quant25SNR;medianSNR;quant75SNR;maxSNR];
176 | SpecDivstatsFMA = [meanSpecDiv;stdSpecDiv;minSpecDiv;quant25SpecDiv;medianSpecDiv;quant75SpecDiv;maxSpecDiv];
177 | 
178 | save('StatsFMA.mat','SNRstatsFMA','SpecDivstatsFMA');
179 | 


--------------------------------------------------------------------------------
/system/contextEncoderSystem.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from system.dnnSystem import DNNSystem
  4 | from utils.colorize import colorize
  5 | from utils.strechableNumpyArray import StrechableNumpyArray
  6 | from utils.tfReader import TFReader
  7 | 
  8 | __author__ = 'Andres'
  9 | 
 10 | 
 11 | class ContextEncoderSystem(DNNSystem):
 12 |     def __init__(self, architecture, batchSize, aPreProcessor, name):
 13 |         self._windowSize = aPreProcessor.signalLength()
 14 |         self._batchSize = batchSize
 15 |         self._audio = tf.placeholder(tf.float32, shape=(batchSize, self._windowSize), name='audio_data')
 16 |         self._preProcessForGap = aPreProcessor.stftForGapOf(self._audio)
 17 |         self._preProcessForContext = aPreProcessor.stftForTheContextOf(self._audio)
 18 |         super().__init__(architecture, name)
 19 |         self._SNR = tf.reduce_mean(self._pavlovs_SNR(self._architecture.output(), self._architecture.target()))
 20 | 
 21 |     def optimizer(self, learningRate):
 22 |         update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 23 |         with tf.control_dependencies(update_ops):
 24 |             return tf.train.AdamOptimizer(learning_rate=learningRate).minimize(self._architecture.loss())
 25 | 
 26 |     def _feedDict(self, data, sess, isTraining=True):
 27 |         net_input, net_target = sess.run([self._preProcessForContext, self._preProcessForGap], feed_dict={self._audio: data})
 28 |         return {self._architecture.input(): net_input, self._architecture.target(): net_target,
 29 |                 self._architecture.isTraining(): isTraining}
 30 | 
 31 |     def reconstructAudio(self, aBatchOfSignals, model_num, max_steps=200):
 32 |         with tf.Session() as sess:
 33 |             path = self.modelsPath(model_num)
 34 |             saver = tf.train.Saver()
 35 |             saver.restore(sess, path)
 36 |             print("Model restored.")
 37 |             sess.run([tf.local_variables_initializer()])
 38 |             reconstructed = StrechableNumpyArray()
 39 |             out_gaps = StrechableNumpyArray()
 40 |             input_shape = list(self._architecture.inputShape())
 41 |             input_shape[0] = 0
 42 |             contexts = np.empty(input_shape)
 43 | 
 44 |             for batch_num in range(min(int(len(aBatchOfSignals)/self._batchSize), max_steps)):
 45 |                 feed_dict = self._feedDict(
 46 |                     aBatchOfSignals[batch_num * self._batchSize:(batch_num + 1) * self._batchSize], sess, False)
 47 |                 reconstructed_input, original, context = sess.run(
 48 |                     [self._architecture.output(), self._architecture.target(),
 49 |                      self._architecture.input()],
 50 |                     feed_dict=feed_dict)
 51 |                 out_gaps.append(np.reshape(original, (-1)))
 52 |                 reconstructed.append(np.reshape(reconstructed_input, (-1)))
 53 |                 contexts = np.concatenate([contexts, context], axis=0)
 54 | 
 55 |             output_shape = self._architecture.output().shape.as_list()
 56 |             output_shape[0] = -1
 57 |             reconstructed = reconstructed.finalize()
 58 |             reconstructed = np.reshape(reconstructed, output_shape)
 59 |             out_gaps = out_gaps.finalize()
 60 |             out_gaps = np.reshape(out_gaps, output_shape)
 61 | 
 62 |             return reconstructed, out_gaps, contexts
 63 | 
 64 |     def reconstruct(self, data_path, model_num, max_steps=200):
 65 |         with tf.Session() as sess:
 66 |             reader = self._loadReader(data_path)
 67 |             path = self.modelsPath(model_num)
 68 |             saver = tf.train.Saver()
 69 |             saver.restore(sess, path)
 70 |             print("Model restored.")
 71 |             sess.run([tf.local_variables_initializer()])
 72 |             return self._reconstruct(sess, reader, max_steps)
 73 | 
 74 |     def _reconstruct(self, sess, data_reader, max_steps):
 75 |         data_reader.start()
 76 |         reconstructed = StrechableNumpyArray()
 77 |         out_gaps = StrechableNumpyArray()
 78 |         input_shape = list(self._architecture.inputShape())
 79 |         input_shape[0] = 0
 80 |         contexts = np.empty(input_shape)
 81 | 
 82 |         for batch_num in range(max_steps):
 83 |             try:
 84 |                 audio = data_reader.dataOperation(session=sess)
 85 |             except StopIteration:
 86 |                 print("rec End of queue!", batch_num)
 87 |                 break
 88 | 
 89 |             feed_dict = self._feedDict(audio, sess, False)
 90 |             reconstructed_input, original, context = sess.run([self._architecture.output(), self._architecture.target(),
 91 |                                                       self._architecture.input()],
 92 |                                                      feed_dict=feed_dict)
 93 |             out_gaps.append(np.reshape(original, (-1)))
 94 |             reconstructed.append(np.reshape(reconstructed_input, (-1)))
 95 |             contexts = np.concatenate([contexts, context], axis=0)
 96 | 
 97 |         output_shape = self._architecture.output().shape.as_list()
 98 |         output_shape[0] = -1
 99 |         reconstructed = reconstructed.finalize()
100 |         reconstructed = np.reshape(reconstructed, output_shape)
101 |         out_gaps = out_gaps.finalize()
102 |         out_gaps = np.reshape(out_gaps, output_shape)
103 | 
104 |         data_reader.finish()
105 | 
106 |         return reconstructed, out_gaps, contexts
107 | 
108 |     def _evaluate(self, summariesDict, feed_dict, validReader, sess):
109 |         trainSNRSummaryToWrite = sess.run(summariesDict['train_SNR_summary'], feed_dict=feed_dict)
110 | 
111 |         try:
112 |             audio = validReader.dataOperation(session=sess)
113 |         except StopIteration:
114 |             print("valid End of queue!")
115 |             return [trainSNRSummaryToWrite]
116 |         feed_dict = self._feedDict(audio, sess, False)
117 |         validSNRSummary = sess.run(summariesDict['valid_SNR_summary'], feed_dict)
118 |         iamgeSummary = sess.run(summariesDict['image_summaries'], feed_dict)
119 | 
120 |         return [trainSNRSummaryToWrite, validSNRSummary, iamgeSummary]
121 | 
122 |     def _loadReader(self, dataPath):
123 |         return TFReader(dataPath, self._windowSize, batchSize=self._batchSize, capacity=int(2e5), num_epochs=400)
124 | 
125 |     def _evaluationSummaries(self):
126 |         summaries_dict = {'train_SNR_summary': tf.summary.scalar("training_SNR", self._SNR),
127 |                           'valid_SNR_summary': tf.summary.scalar("validation_SNR", self._SNR),
128 |     			          'image_summaries': self._spectrogramImageSummary()}
129 |         return summaries_dict
130 | 
131 |     def _squaredEuclideanNorm(self, tensor, onAxis=[1, 2, 3]):
132 |         squared = tf.square(tensor)
133 |         summed = tf.reduce_sum(squared, axis=onAxis)
134 |         return summed
135 | 
136 |     def _log10(self, tensor):
137 |         numerator = tf.log(tensor)
138 |         denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
139 |         return numerator / denominator
140 | 
141 |     def _pavlovs_SNR(self, y_orig, y_inp, onAxis=[1, 2, 3]):
142 |         norm_y_orig = self._squaredEuclideanNorm(y_orig, onAxis)
143 |         norm_y_orig_minus_y_inp = self._squaredEuclideanNorm(y_orig - y_inp, onAxis)
144 |         return 10 * self._log10(norm_y_orig / norm_y_orig_minus_y_inp)
145 | 
146 |     def _spectrogramImageSummary(self):
147 |         complexOutput = self._architecture.output()[0]
148 |         outputSpectrogram = tf.sqrt(tf.reduce_sum(tf.square(complexOutput), axis=-1))
149 | 
150 |         complexTarget = self._architecture.target()[0]
151 |         targetSpectrogram = tf.sqrt(tf.reduce_sum(tf.square(complexTarget), axis=-1))
152 | 
153 |         complexLeft = self._architecture.input()[0, :, :, 0:2]
154 |         leftSpectrogram = tf.sqrt(tf.reduce_sum(tf.square(complexLeft), axis=-1))
155 | 
156 |         complexRight = self._architecture.input()[0, :, :, 2:4]
157 |         rightSpectrogram = tf.sqrt(tf.reduce_sum(tf.square(complexRight), axis=-1))
158 | 
159 |         totalSpectrogram = tf.transpose(tf.concat([leftSpectrogram, outputSpectrogram,
160 |                                                    rightSpectrogram], axis=0))
161 | 
162 |         return tf.summary.merge([tf.summary.image("Original", [colorize(tf.transpose(targetSpectrogram))]),
163 |                                 tf.summary.image("Generated", [colorize(tf.transpose(outputSpectrogram))]),
164 |                                 tf.summary.image("Complete", [colorize(totalSpectrogram)])])
165 | 


--------------------------------------------------------------------------------
/utils/test/ftest_stftForTheInpaintingSetting.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | from system.preAndPostProcessor import PreAndPostProcessor
  7 | 
  8 | __author__ = 'Andres'
  9 | 
 10 | 
 11 | class TestStftForTheContextEncoder(TestCase):
 12 |     def setUp(self):
 13 |         self.signal_length = 5120
 14 |         self.gap_length = 1024
 15 |         self.fft_window_length = 512
 16 |         self.fft_hop_size = 128
 17 | 
 18 |         self.anStftForTheInpaintingSetting = PreAndPostProcessor(signalLength=self.signal_length,
 19 |                                                                  gapLength=self.gap_length,
 20 |                                                                  fftWindowLength=self.fft_window_length,
 21 |                                                                  fftHopSize=self.fft_hop_size)
 22 | 
 23 |     def test01TheStftTakesTheInpaintingParametersAsInput(self):
 24 |         self.assertEquals(self.anStftForTheInpaintingSetting.signalLength(), self.signal_length)
 25 |         self.assertEquals(self.anStftForTheInpaintingSetting.gapLength(), self.gap_length)
 26 |         self.assertEquals(self.anStftForTheInpaintingSetting.fftWindowLenght(), self.fft_window_length)
 27 |         self.assertEquals(self.anStftForTheInpaintingSetting.fftHopSize(), self.fft_hop_size)
 28 | 
 29 |     def test02TheStftKnowsHowMuchPaddingItShouldApply(self):
 30 |         self.assertEquals(self.anStftForTheInpaintingSetting.padding(), self.fft_window_length-self.fft_hop_size)
 31 | 
 32 |         fft_window_length = 1024
 33 |         fft_hop_size = 128
 34 |         anStftForTheInpaintingSetting = PreAndPostProcessor(signalLength=self.signal_length,
 35 |                                                             gapLength=self.gap_length,
 36 |                                                             fftWindowLength=fft_window_length,
 37 |                                                             fftHopSize=fft_hop_size)
 38 |         self.assertEquals(anStftForTheInpaintingSetting.padding(), fft_window_length - fft_hop_size)
 39 | 
 40 |         fft_window_length = 1024
 41 |         fft_hop_size = 256
 42 |         anStftForTheInpaintingSetting = PreAndPostProcessor(signalLength=self.signal_length,
 43 |                                                             gapLength=self.gap_length,
 44 |                                                             fftWindowLength=fft_window_length,
 45 |                                                             fftHopSize=fft_hop_size)
 46 |         self.assertEquals(anStftForTheInpaintingSetting.padding(), fft_window_length - fft_hop_size)
 47 | 
 48 |     def test03TheStftKnowsWhatSignalItShouldTakeForTheSTFTOfTheGap(self):
 49 |         fake_batch_of_signal = np.array([np.arange(self.signal_length)])
 50 |         produced_signal = self.anStftForTheInpaintingSetting._removeExtraSidesForSTFTOfGap(fake_batch_of_signal)
 51 | 
 52 |         gap_begins = (self.signal_length-self.gap_length)//2
 53 |         gap_ends = gap_begins + self.gap_length
 54 |         padding = self.fft_window_length-self.fft_hop_size
 55 | 
 56 |         np.testing.assert_almost_equal(fake_batch_of_signal[:, gap_begins - padding:gap_ends + padding], produced_signal)
 57 | 
 58 |         fft_window_length = 128
 59 |         fft_hop_size = 32
 60 | 
 61 |         anStftForTheInpaintingSetting = PreAndPostProcessor(signalLength=self.signal_length,
 62 |                                                             gapLength=self.gap_length,
 63 |                                                             fftWindowLength=fft_window_length,
 64 |                                                             fftHopSize=fft_hop_size)
 65 |         produced_signal = anStftForTheInpaintingSetting._removeExtraSidesForSTFTOfGap(fake_batch_of_signal)
 66 |         padding = fft_window_length - fft_hop_size
 67 |         np.testing.assert_almost_equal(fake_batch_of_signal[:, gap_begins - padding:gap_ends + padding], produced_signal)
 68 | 
 69 |     def test04TheStftProducesAnSTFTOfTheExpectedShapeForTheGap(self):
 70 |         batch_size = 32
 71 |         aBatchOfSignals = tf.placeholder(tf.float32, shape=(batch_size, self.signal_length), name='input_data')
 72 |         aStft = self.anStftForTheInpaintingSetting.stftForGapOf(aBatchOfSignals)
 73 | 
 74 |         framesOnGap = (((self.gap_length + self.anStftForTheInpaintingSetting.padding()*2)-self.fft_window_length)/
 75 |                        self.fft_hop_size)+1
 76 |         binsPerFrame = self.fft_window_length//2+1
 77 |         realAndImagChannels = 2
 78 |         self.assertEquals(aStft.shape.as_list(), [32, framesOnGap, binsPerFrame, realAndImagChannels])
 79 | 
 80 |     def test05TheStftRemovesTheGapCorrectly(self):
 81 |         fake_batch_of_signal = np.array([np.arange(self.signal_length)])
 82 |         produced_signal = self.anStftForTheInpaintingSetting._removeGap(fake_batch_of_signal)
 83 | 
 84 |         gap_begins = (self.signal_length-self.gap_length)//2
 85 |         gap_ends = gap_begins + self.gap_length
 86 | 
 87 |         left_side = fake_batch_of_signal[:, :gap_begins]
 88 |         right_side = fake_batch_of_signal[:, gap_ends:]
 89 |         signal_without_gap = tf.stack((left_side, right_side), axis=1)
 90 | 
 91 |         with tf.Session() as sess:
 92 |             produced_signal, signal_without_gap = sess.run([produced_signal, signal_without_gap])
 93 | 
 94 |         np.testing.assert_almost_equal(signal_without_gap, produced_signal)
 95 | 
 96 |     def test06TheStftAddsTheCorrectPaddingToTheSides(self):
 97 |         side_length = (self.signal_length-self.gap_length)//2
 98 | 
 99 |         left_side = np.array([np.arange(side_length, dtype=np.float32)])
100 |         right_side = np.array([np.arange(side_length, dtype=np.float32)])
101 |         fake_batch_of_sides = tf.stack((left_side, right_side), axis=1)
102 | 
103 |         produced_signal = self.anStftForTheInpaintingSetting._addPaddingForStftOfContext(fake_batch_of_sides)
104 | 
105 |         with tf.Session() as sess:
106 |             produced_signal = sess.run(produced_signal)
107 | 
108 |         left_side_padded = np.concatenate((left_side, np.zeros((1, self.fft_window_length-self.fft_hop_size))), axis=1)
109 |         right_side_padded = np.concatenate((np.zeros((1, self.fft_window_length-self.fft_hop_size)), right_side), axis=1)
110 |         new_signal = np.stack([left_side_padded, right_side_padded], axis=1)
111 | 
112 |         np.testing.assert_almost_equal(new_signal, produced_signal)
113 | 
114 |     def test07TheStftOfTheContextHasTheExpectedShape(self):
115 |         batch_size = 32
116 |         aBatchOfSignals = tf.placeholder(tf.float32, shape=(batch_size, self.signal_length), name='input_data')
117 |         aStft = self.anStftForTheInpaintingSetting.stftForTheContextOf(aBatchOfSignals)
118 | 
119 |         side_length = (self.signal_length-self.gap_length)//2
120 |         framesOnSides = ((side_length + self.anStftForTheInpaintingSetting.padding() - self.fft_window_length)
121 |                          / self.fft_hop_size)+1
122 |         binsPerFrame = self.fft_window_length//2+1
123 |         realAndImagChannels = 2
124 |         beforeAndAfterChannels = 2
125 | 
126 |         self.assertEquals(aStft.shape.as_list(), [32, framesOnSides, binsPerFrame,
127 |                                                   realAndImagChannels*beforeAndAfterChannels])
128 | 
129 |     def test08TheStftProducesTheCorrectShapeWhenDoingTheInverseStftOnTheGap(self):
130 |         batch_size = 32
131 |         framesOnGap = (((self.gap_length + self.anStftForTheInpaintingSetting.padding()*2)-self.fft_window_length)/
132 |                        self.fft_hop_size)+1
133 |         binsPerFrame = self.fft_window_length//2+1
134 |         batchOfGapStft = tf.zeros((batch_size, framesOnGap, binsPerFrame), dtype=tf.complex64)
135 | 
136 |         batchOfGaps = self.anStftForTheInpaintingSetting.inverseStftOfGap(batchOfGapStft)
137 | 
138 |         with tf.Session() as sess:
139 |             batchOfGaps = sess.run(batchOfGaps)
140 | 
141 |         self.assertEquals(batchOfGaps.shape, (batch_size, self.gap_length))
142 | 
143 |     def test09TheStftProducesTheCorrectShapeWhenDoingTheInverseStftOnTheFullSignal(self):
144 |         batch_size = 32
145 |         frameCount = ((self.signal_length-self.fft_window_length)/self.fft_hop_size)+1
146 |         binsPerFrame = self.fft_window_length//2+1
147 |         batchOfSignalStft = tf.zeros((batch_size, frameCount, binsPerFrame), dtype=tf.complex64)
148 | 
149 |         batchOfSignals = self.anStftForTheInpaintingSetting.inverseStftOfSignal(batchOfSignalStft)
150 | 
151 |         with tf.Session() as sess:
152 |             batchOfGaps = sess.run(batchOfSignals)
153 | 
154 |         self.assertEquals(batchOfGaps.shape, (batch_size, self.signal_length))
155 | 


--------------------------------------------------------------------------------
/utils/legacy/stftRealImagContextEncoder.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | from utils.legacy.contextEncoder import ContextEncoderNetwork
  7 | from utils.legacy.evaluationWriter import EvaluationWriter
  8 | from utils.legacy.plotSummary import PlotSummary
  9 | from utils.strechableNumpyArray import StrechableNumpyArray
 10 | from utils.tfReader import TFReader
 11 | 
 12 | __author__ = 'Andres'
 13 | 
 14 | 
 15 | class StftRealImagContextEncoder(ContextEncoderNetwork):
 16 |     def __init__(self, model, batch_size, stft, window_size, gap_length, learning_rate, name):
 17 |         self._stft = stft
 18 |         super(StftRealImagContextEncoder, self).__init__(model, batch_size, window_size, gap_length, learning_rate,
 19 |                                                          name)
 20 |         self._sides = tf.placeholder(tf.float32, shape=(batch_size, self._window_size - self._gap_length), name='sides')
 21 |         self._reconstructedSignal = self._reconstructSignal(self._sides, self.gap_data)
 22 | 
 23 |         self._SNR = tf.reduce_mean(self._pavlovs_SNR(self._stft[:, 15:15 + 7, :, :], self._reconstructed_input_data,
 24 |                                                      onAxis=[1, 2, 3]))
 25 | 
 26 |     def _reconstructSignal(self, sides, gaps):
 27 |         signal_length = self._window_size - self._gap_length
 28 |         first_half = sides[:, :signal_length // 2]
 29 |         second_half = sides[:, signal_length // 2:]
 30 | 
 31 |         reconstructed_signal = tf.concat([first_half, gaps, second_half], axis=1)
 32 |         return reconstructed_signal
 33 | 
 34 |     def _loss_graph(self):
 35 |         with tf.variable_scope("Loss"):
 36 |             gap_stft = self._stft[:, 15:15 + 7, :, :]
 37 | 
 38 |             norm_orig = self._squaredEuclideanNorm(gap_stft, onAxis=[1, 2, 3])
 39 |             norm_orig_summary = tf.summary.scalar("norm_orig", tf.reduce_min(norm_orig))
 40 | 
 41 |             error = gap_stft - self._reconstructed_input_data
 42 |             # Nati comment: here you should use only one reduce sum function
 43 |             error_per_example = tf.reduce_sum(tf.square(error), axis=[1, 2, 3])
 44 | 
 45 |             reconstruction_loss = 0.5 * tf.reduce_sum(error_per_example * (1 + 5 / (norm_orig+1e-2)))
 46 | 
 47 |             rec_loss_summary = tf.summary.scalar("reconstruction_loss", reconstruction_loss)
 48 | 
 49 |             trainable_vars = tf.trainable_variables()
 50 |             lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars if 'bias' not in v.name]) * 1e-2
 51 |             l2_loss_summary = tf.summary.scalar("lossL2", lossL2)
 52 | 
 53 |             total_loss = tf.add_n([reconstruction_loss, lossL2])
 54 |             total_loss_summary = tf.summary.scalar("total_loss", total_loss)
 55 | 
 56 |             self._lossSummaries = tf.summary.merge([rec_loss_summary, l2_loss_summary, norm_orig_summary, total_loss_summary])
 57 | 
 58 |             return total_loss
 59 | 
 60 |     def reconstructAudio(self, audios, model_num=None, max_batchs=200):
 61 |         with tf.Session() as sess:
 62 |             if model_num is not None:
 63 |                 path = self.modelsPath(model_num)
 64 |             else:
 65 |                 path = self.modelsPath(self._initial_model_num)
 66 |             saver = tf.train.Saver()
 67 |             saver.restore(sess, path)
 68 |             print("Model restored.")
 69 | 
 70 |             batches_count = int(len(audios) / self._batch_size)
 71 | 
 72 |             reconstructed = StrechableNumpyArray()
 73 |             original_stfts = StrechableNumpyArray()
 74 |             for batch_num in range(min(batches_count, max_batchs)):
 75 |                 batch_data = audios[batch_num * self._batch_size:batch_num * self._batch_size + self._batch_size]
 76 |                 feed_dict = {self._model.input(): batch_data, self._model.isTraining(): False}
 77 |                 reconstructed_input, original_stft = sess.run([self._reconstructed_input_data, self._stft],
 78 |                                                          feed_dict=feed_dict)
 79 |                 original_stfts.append(np.reshape(original_stft, (-1)))
 80 |                 reconstructed.append(np.reshape(reconstructed_input, (-1)))
 81 |             reconstructed = reconstructed.finalize()
 82 |             reconstructed_stft = np.reshape(reconstructed, (-1, 37, 257))
 83 |             original_stfts = original_stfts.finalize()
 84 |             original_stft = np.reshape(original_stfts, (-1, 7, 257, 2))
 85 | 
 86 |             return reconstructed_stft, original_stft
 87 | 
 88 |     def _reconstruct(self, sess, data_reader, max_steps):
 89 |         data_reader.start()
 90 |         reconstructed = StrechableNumpyArray()
 91 |         out_gaps = StrechableNumpyArray()
 92 |         for batch_num in range(max_steps):
 93 |             try:
 94 |                 sides, gaps = data_reader.dataOperation(session=sess)
 95 |             except StopIteration:
 96 |                 print(batch_num)
 97 |                 print("rec End of queue!")
 98 |                 break
 99 |             reconstructed_signal = sess.run(self._reconstructedSignal,
100 |                                             feed_dict={self._sides: sides, self.gap_data: gaps})
101 |             gap_stft = self._stft[:, 15:15 + 7, :]
102 | 
103 |             feed_dict = {self._model.input(): reconstructed_signal, self._model.isTraining(): False}
104 |             reconstructed_input, original = sess.run([self._reconstructed_input_data, gap_stft], feed_dict=feed_dict)
105 |             out_gaps.append(np.reshape(original, (-1)))
106 |             reconstructed.append(np.reshape(reconstructed_input, (-1)))
107 | 
108 |         reconstructed = reconstructed.finalize()
109 |         reconstructed = np.reshape(reconstructed, (-1, 7, 257, 2))
110 |         out_gaps = out_gaps.finalize()
111 |         out_gaps = np.reshape(out_gaps, (-1, 7, 257, 2))
112 | 
113 |         data_reader.finish()
114 | 
115 |         return reconstructed, out_gaps
116 | 
117 |     def train(self, train_data_path, valid_data_path, num_steps=2e2, restore_num=None,
118 |               per_process_gpu_memory_fraction=1):
119 |         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=per_process_gpu_memory_fraction)
120 |         with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
121 |             try:
122 |                 trainReader = TFReader(train_data_path, self._window_size, self._gap_length, capacity=int(2e5),
123 |                                        num_epochs=400)
124 |                 validReader = TFReader(valid_data_path, self._window_size, self._gap_length, capacity=int(2e5),
125 |                                        num_epochs=40000)
126 | 
127 |                 saver = tf.train.Saver(max_to_keep=1000)
128 |                 if restore_num == 0:
129 |                     init = tf.global_variables_initializer()
130 |                     sess.run([init, tf.local_variables_initializer()])
131 |                     print("Initialized")
132 |                 else:
133 |                     path = self.modelsPath(restore_num)
134 |                     self._initial_model_num = get_trailing_number(path[:-5])
135 |                     print(self._initial_model_num)
136 |                     saver.restore(sess, path)
137 |                     sess.run([tf.local_variables_initializer()])
138 |                     print("Model restored.")
139 | 
140 |                 logs_path = '../logdir_real_cae/' + self._name  # write each run to a diff folder.
141 |                 print("logs path:", logs_path)
142 |                 writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
143 | 
144 |                 train_SNR_summary = tf.summary.scalar("training_SNR", self._SNR)
145 |                 valid_SNR = tf.placeholder(tf.float32, name="valid_SNR")
146 |                 valid_SNR_summary = tf.summary.scalar("validation_SNR", valid_SNR)
147 |                 plot_summary = PlotSummary('reconstruction')
148 | 
149 |                 trainReader.start()
150 |                 evalWriter = EvaluationWriter(self._name + '.xlsx')
151 | 
152 |                 # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
153 |                 # run_metadata = tf.RunMetadata()
154 |                 # many_runs_timeline = TimeLiner()
155 | 
156 |                 for step in range(1, int(num_steps)):
157 |                     try:
158 |                         sides, gaps = trainReader.dataOperation(session=sess)
159 |                     except StopIteration:
160 |                         print(step)
161 |                         print("End of queue!")
162 |                         break
163 | 
164 |                     rec = sess.run(self._reconstructedSignal, feed_dict={self._sides: sides, self.gap_data: gaps})
165 | 
166 |                     feed_dict = {self._model.input(): rec, self.gap_data: gaps, self._model.isTraining(): True}
167 |                     sess.run(self._optimizer, feed_dict=feed_dict)  # , options=options, run_metadata=run_metadata)
168 | 
169 |                     # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
170 |                     # chrome_trace = fetched_timeline.generate_chrome_trace_format()
171 |                     # many_runs_timeline.update_timeline(chrome_trace)
172 | 
173 |                     if step % 40 == 0:
174 |                         train_summ = sess.run(self._lossSummaries, feed_dict=feed_dict)
175 |                         writer.add_summary(train_summ, self._initial_model_num + step)
176 |                     if step % 2000 == 0:
177 |                         print(step)
178 |                         #reconstructed, out_gaps = self._reconstruct(sess, trainReader, max_steps=8)  # WRONG
179 |                         # plot_summary.plotSideBySide(out_gaps, reconstructed)
180 |                         trainSNRSummaryToWrite = sess.run(train_SNR_summary, feed_dict=feed_dict)
181 |                         writer.add_summary(trainSNRSummaryToWrite, self._initial_model_num + step)
182 |                         #summaryToWrite = plot_summary.produceSummaryToWrite(sess)
183 |                         #writer.add_summary(summaryToWrite, self._initial_model_num + step)
184 |                         saver.save(sess, self.modelsPath(self._initial_model_num + step))
185 |                         reconstructed, out_gaps = self._reconstruct(sess, validReader, max_steps=8)
186 |                         step_valid_SNR = evalWriter.evaluateImages(reconstructed, out_gaps, self._initial_model_num + step)
187 |                         validSNRSummaryToWrite = sess.run(valid_SNR_summary, feed_dict={valid_SNR: step_valid_SNR})
188 |                         writer.add_summary(validSNRSummaryToWrite, self._initial_model_num + step)
189 | 
190 |             except KeyboardInterrupt:
191 |                 pass
192 |             # many_runs_timeline.save('timeline_03_merged_%d_runs.json' % step)
193 |             evalWriter.save()
194 |             train_summ = sess.run(self._lossSummaries, feed_dict=feed_dict)
195 |             writer.add_summary(train_summ, self._initial_model_num + step)
196 |             saver.save(sess, self.modelsPath(self._initial_model_num + step))
197 |             self._initial_model_num += step
198 | 
199 |             trainReader.finish()
200 |             print("Finalizing at step:", self._initial_model_num)
201 |             print("Last saved model:", self.modelsPath(self._initial_model_num))
202 | 
203 | 
204 | def get_trailing_number(s):
205 |     m = re.search(r'\d+$', s)
206 |     return int(m.group()) if m else None
207 | 


--------------------------------------------------------------------------------