├── .gitattributes
├── CodeTest
    └── Test.py
├── 02.GRUTraining
    ├── CreateTestDataset.py
    ├── ModelTest.py
    ├── GRUModelTest.py
    ├── LSTMTestTraining.py
    └── GRUTraining.py
├── .gitignore
├── README.md
└── 01.CreateNoiseAddedDataset
    └── CreateNoiseAddDataset.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/CodeTest/Test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import scipy.io.wavfile as wav
 3 | import scipy.signal as signal
 4 | 
 5 | humanVoice = os.getcwd() + "/abjones_1_01_voice.wav"
 6 | whiteNoise = os.getcwd() + "/abjones_1_01_wnoise.wav"
 7 | brownNoise = os.getcwd() + "/abjones_1_01_bnoise.wav"
 8 | pinkNoise = os.getcwd() + "/abjones_1_01_pnoise.wav"
 9 | 
10 | rate0, data0 = wav.read(humanVoice)
11 | rate1, data1 = wav.read(whiteNoise)
12 | rate2, data2 = wav.read(brownNoise)
13 | rate3, data3 = wav.read(pinkNoise)
14 | 
15 | _, t0, _ = signal.stft(data0, fs = 16000, nperseg = 1024, return_onesided = True)
16 | _, t1, _ = signal.stft(data1, fs = 16000, nperseg = 1024, return_onesided = True)
17 | _, t2, _ = signal.stft(data2, fs = 16000, nperseg = 1024, return_onesided = True)
18 | _, t3, _ = signal.stft(data3, fs = 16000, nperseg = 1024, return_onesided = True)
19 | 
20 | print("END")


--------------------------------------------------------------------------------
/02.GRUTraining/CreateTestDataset.py:
--------------------------------------------------------------------------------
 1 | # Code By adityatb at https://github.com/adityatb/noise-reduction-using-rnn
 2 | # Create Test Database by sampling n files and moving to Test file directory
 3 | # Maintain by ShYy, 2018.
 4 | 
 5 | import os
 6 | import random
 7 | import shutil
 8 | 
 9 | move_no_files = 1000
10 | 
11 | work_path = os.getcwd()+"/Training/NoiseAdded/"
12 | test_files_path = os.getcwd()+"/Testing/NoiseAdded/"
13 | 
14 | src_files = (os.listdir(work_path))
15 | 
16 | def valid_path(dir_path, filename):
17 |     full_path = os.path.join(dir_path, filename)
18 |     return os.path.isfile(full_path)
19 | 
20 | files = [os.path.join(work_path, f) for f in src_files if valid_path(work_path, f)]
21 | choices = random.sample(files, move_no_files)
22 | for files in choices:
23 |     shutil.move(files, test_files_path)
24 |     print("Moved: " + str(files))
25 | 
26 | print("\nFinished!")


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask instance folder
 57 | instance/
 58 | 
 59 | # Scrapy stuff:
 60 | .scrapy
 61 | 
 62 | # Sphinx documentation
 63 | docs/_build/
 64 | 
 65 | # PyBuilder
 66 | target/
 67 | 
 68 | # IPython Notebook
 69 | .ipynb_checkpoints
 70 | 
 71 | # pyenv
 72 | .python-version
 73 | 
 74 | # celery beat schedule file
 75 | celerybeat-schedule
 76 | 
 77 | # dotenv
 78 | .env
 79 | 
 80 | # virtualenv
 81 | venv/
 82 | ENV/
 83 | 
 84 | # Spyder project settings
 85 | .spyderproject
 86 | 
 87 | # Rope project settings
 88 | .ropeproject
 89 | 
 90 | # =========================
 91 | # Operating System Files
 92 | # =========================
 93 | 
 94 | # OSX
 95 | # =========================
 96 | 
 97 | .DS_Store
 98 | .AppleDouble
 99 | .LSOverride
100 | 
101 | # Thumbnails
102 | ._*
103 | 
104 | # Files that might appear in the root of a volume
105 | .DocumentRevisions-V100
106 | .fseventsd
107 | .Spotlight-V100
108 | .TemporaryItems
109 | .Trashes
110 | .VolumeIcon.icns
111 | 
112 | # Directories potentially created on remote AFP share
113 | .AppleDB
114 | .AppleDesktop
115 | Network Trash Folder
116 | Temporary Items
117 | .apdisk
118 | 
119 | # Windows
120 | # =========================
121 | 
122 | # Windows image file caches
123 | Thumbs.db
124 | ehthumbs.db
125 | 
126 | # Folder config file
127 | Desktop.ini
128 | 
129 | # Recycle Bin used on file shares
130 | $RECYCLE.BIN/
131 | 
132 | # Windows Installer files
133 | *.cab
134 | *.msi
135 | *.msm
136 | *.msp
137 | 
138 | # Windows shortcuts
139 | *.lnk
140 | 
141 | # Database files
142 | # =========================
143 | 
144 | # Audio files
145 | *.wav
146 | 
147 | # .idea
148 | # =========================
149 | 
150 | .idea/
151 | 
152 | # Training Checkpoint
153 | # =========================
154 | 
155 | TF_Checkpoints/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NoiseReductionUsingGRU  
 2 | Graduation Project Title: Noise Reduction Using GRU(RNN).  
 3 |   
 4 | This is my graduation project in BIT, 2018.  
 5 |   
 6 | The project based on Python 3.5 and TensorFlow 1.8.  
 7 | The project learning from [Noise Reduction using RNNs with Tensorflow](https://github.com/adityatb/noise-reduction-using-rnn) by adityatb.  
 8 | Thanks for [adityatb's work](https://github.com/adityatb/noise-reduction-using-rnn)! adityatb uses LSTM to build noise reduction model.  
 9 |   
10 | With the development of RNN, I think GRU may be a better method.  
11 | This graduation project uses [MIR-1K](https://sites.google.com/site/unvoicedsoundseparation/mir-1k) dataset and try to use GRU to build noise reduction model.  
12 |   
13 | ## Introduction  
14 | This project includes 2 main part.  
15 | 1. CreateNoiseAddedDataset  
16 | 	This script will add 3 kinds of noises(Brownian, Pink and White) to the clean human voice.  
17 | 	If you have 1000 human voice files(MIR-1K has 1000 usable files), you will get 3000 noise added files.  
18 |   
19 | 2. GRUTraining  
20 | 	This is a big part including 5 scripts.  
21 | 	1. CreateTestDataset.py  
22 | 		In last step, we have got 3000 files.  
23 | 		By this script, we randomly separate 1000 files to be used in Test process.  
24 |   
25 | 	2. LSTMTestTraining.py  
26 | 		This script and the next script aim to check LSTM model's performance.  
27 | 		To see the GRU model works or not.  
28 | 		  
29 | 		adityatb used LSTM model, but the original code has a lot of problems:  
30 | 			>Python 2.x -> Python 3.x  
31 | 			>Array overflow problems  
32 | 			>Fourier trasformation using error  
33 | 			>No Learning Rate auto change  
34 | 			>...  
35 | 		I solved a lot. It is hard to introduce all at here.  
36 | 		Please refer to the history of git publishment.  
37 |   
38 | 	3. ModelTest.py  
39 | 		This script will use the LSTM model trained in last script and execute 1000 test sounds.  
40 | 		To listen and compare the spectrum of [clear voice], [noise added sound] and [output] to see the performance.  
41 |   
42 | 	4. GRUTraining.py  
43 | 		This script is modified by `LSTMTestTraining.py`.  
44 | 		Change LSTM to GRU in TensorFlow framwork is very easy.  
45 |   
46 | 	5. GRUModelTest.py  
47 | 		This script is modified by `LSTMTestTraining.py`/  
48 | 	  
49 | ## Execute Steps  
50 | **NOTICE:The detail of the implement, please read the code, refer to the modification history.**  
51 | 1. CreateNoiseAddedDataset  
52 | 	Download [MIR-1K](https://sites.google.com/site/unvoicedsoundseparation/mir-1k) dataset. Decompress the folder `Wavfile`. We will get 1000 wav files.  
53 | 	Set all wav files into the `Wavs` folder.  
54 | 	Run `CreateNoiseAddedDataset.py`.  
55 | 	We will get 1000 human voice files in `./Training/HumanVoices`.  
56 | 	3000 noise added sounds in `./Training/NoiseAdded`.  
57 |   
58 | 2. GRUTraining  
59 | 	1. CreateTestDataset  
60 | 		Cut the `./Training` folder into this part folder.  
61 | 		Run `CreateNoiseAddedDataset.py`.  
62 | 		We will get 1000 random chosen noise added files in `./Testing/NoiseAdded`.  
63 | 		And in `./Training/NoiseAdded` will 2000 files left.  
64 |   
65 | 	2. GRUTraining  
66 | 		Run `GRUTraining.py`.  
67 | 		This script will train a GRU model from remaining 2000 files and corresponding pure human voice files.  
68 | 		Finally we will get TensorFlow checkpoint files in `./TFCheckpoints` folder.  
69 |   
70 | 	3. GRUModelTest  
71 | 		Run `GRUModelTest.py`.  
72 | 		We will get less than 1000 files(because some noise added files correspond to the same pure human voice file) in `./Testing/ModelOutput` folder.  
73 | 		You can test the model or do something else you like.  
74 |   
75 | ***FINISH!***
76 | 


--------------------------------------------------------------------------------
/01.CreateNoiseAddedDataset/CreateNoiseAddDataset.py:
--------------------------------------------------------------------------------
  1 | # Code by ShYy
  2 | 
  3 | # This script uses MIR-1K dataset, to create a Noise Added MIR-1K-NA.
  4 | 
  5 | import os, random
  6 | import numpy as np
  7 | import scipy.io.wavfile as wav
  8 | 
  9 | 
 10 | # Mix Audio and Noise as 75%Audio and 25%Noise
 11 | def mix_audio(data, noise):
 12 |     mix = np.add(0.75*data, 0.25*noise)
 13 |     out = np.array(mix)
 14 | 
 15 |     return out
 16 | 
 17 | 
 18 | # Set a start point from front 15s of the noise audios. The human voice waves are all shorter than 15s.
 19 | def create_noise_piece(noisedata, data):
 20 |     datalength = len(data)
 21 |     startpoint = random.randrange(0, 240000)         # 30s noise has 480000 sampled points at 16kHz
 22 | 
 23 |     outputdata = noisedata[startpoint:startpoint + datalength]
 24 | 
 25 |     return outputdata
 26 | 
 27 | 
 28 | # ----- Main Function Start -----
 29 | 
 30 | # Directories
 31 | wavsDir = os.getcwd() + "/Wavs/"
 32 | noiseDir = os.getcwd() + "/Noises/"
 33 | noiseAddedDir = os.getcwd() + "/Training/NoiseAdded/"
 34 | humanVoiceDir = os.getcwd() + "/Training/HumanVoices/"
 35 | 
 36 | # Noise File Name
 37 | whiteNoise = "WhiteNoise.wav"
 38 | brownianNoise = "BrownianNoise.wav"
 39 | pinkNoise = "PinkNoise.wav"
 40 | 
 41 | # The length of the noise file. Three noise file are all 30s.
 42 | os.chdir(noiseDir)
 43 | tempNoiseData = wav.read(whiteNoise)
 44 | noiseLength = len(tempNoiseData)
 45 | 
 46 | # Get Noises
 47 | wNoiseRate, wNoise = wav.read(whiteNoise)
 48 | bNoiseRate, bNoise = wav.read(brownianNoise)
 49 | pNoiseRate, pNoise = wav.read(pinkNoise)
 50 | 
 51 | # Define Mixture of audio
 52 | WhiteNoiseMix = []
 53 | BrownianNoiseMix = []
 54 | PinkNoiseMix = []
 55 | 
 56 | # File Counter for Debugging
 57 | fileCounter = 0
 58 | 
 59 | # Enter /Wavs/ dir to start mixing processing
 60 | os.chdir(wavsDir)
 61 | 
 62 | # The wave files are combined with 2 channel, left for music and right for human voice.
 63 | # For each wave file, use the right channel and sum it with three types of noises.
 64 | for fileName in os.listdir(wavsDir):
 65 |     if fileName.endswith(".wav"):
 66 |         # Read the wave file
 67 |         wavFileRate, wavFile = wav.read(fileName)
 68 | 
 69 |         # Use the right channel
 70 |         rightChannel = wavFile[:, 1]
 71 |         humanVoice = np.array(rightChannel)
 72 | 
 73 |         # Normalize human voice sample. Gain from the other channel, BGM channel.
 74 |         humanVoicePeak = max(abs(humanVoice))
 75 |         wavFilePeak = np.iinfo(wavFile.dtype).max
 76 |         gain = float(wavFilePeak)/humanVoicePeak
 77 |         humanVoiceNormalized = np.array(humanVoice * gain)
 78 | 
 79 |         # Create Mixtures
 80 |         print("Mixing " + fileName + " with Noises...")
 81 |         WhiteNoiseMix = np.array(mix_audio(humanVoiceNormalized, create_noise_piece(wNoise, humanVoiceNormalized)))
 82 |         BrownianNoiseMix = np.array(mix_audio(humanVoiceNormalized, create_noise_piece(bNoise, humanVoiceNormalized)))
 83 |         PinkNoiseMix = np.array(mix_audio(humanVoiceNormalized, create_noise_piece(pNoise, humanVoiceNormalized)))
 84 | 
 85 |         # Write mixture audio files in the training directory.
 86 |         os.chdir(noiseAddedDir)
 87 |         fName, extendName = os.path.splitext(fileName)
 88 |         wav.write(fName + "_wnoise" + extendName, wavFileRate, WhiteNoiseMix.astype(np.int16))
 89 |         wav.write(fName + "_bnoise" + extendName, wavFileRate, BrownianNoiseMix.astype(np.int16))
 90 |         wav.write(fName + "_pnoise" + extendName, wavFileRate, PinkNoiseMix.astype(np.int16))
 91 | 
 92 |         # Write human voice audio to the directory for computing with Noise Added audios
 93 |         os.chdir(humanVoiceDir)
 94 |         wav.write(fName + "_voice" + extendName, wavFileRate, humanVoiceNormalized.astype(np.int16))
 95 | 
 96 |         # End and back to the Waves directory
 97 |         print("Finished Processing: " + fileName)
 98 |         os.chdir(wavsDir)
 99 | 
100 |         # Counter++
101 |         fileCounter = fileCounter + 1
102 | 
103 | print("Total Processed: " + str(fileCounter) + " file(s).")


--------------------------------------------------------------------------------
/02.GRUTraining/ModelTest.py:
--------------------------------------------------------------------------------
  1 | # This script aims to test the model using the Test Dataset seperated from source Dataset by CreateTestDataset.py
  2 | # Code by ShYy, 2018.
  3 | 
  4 | import scipy
  5 | import scipy.signal as signal
  6 | import numpy as np
  7 | import os
  8 | import random
  9 | import sys
 10 | import scipy.io.wavfile as wav
 11 | import tensorflow as tf
 12 | import math
 13 | 
 14 | # Get the source Human Voice file names by Noise Added file names.
 15 | def formatSrcFilename(filename):
 16 |     return filename[:len(filename) - 11] + "_voice.wav"
 17 | 
 18 | def formatOutputFilename(filename):
 19 |     return filename[:len(filename) - 11] + "_output.wav"
 20 | 
 21 | def sequentialized_spectrum(batch):
 22 |     # Get maximum length of batch
 23 |     t = []
 24 |     t_vec = []
 25 |     Sxx_Vec = []
 26 |     for each in batch:
 27 |         _, t, Sxx_Vec_Temp = signal.stft(each, fs=testNARateRepository[0], nperseg=stft_size, return_onesided = False)
 28 |         t_vec.append(t)
 29 |         Sxx_Vec.append(Sxx_Vec_Temp)
 30 |     maximum_length = findMaxlen(t_vec)
 31 | 
 32 |     max_run_total = int(math.ceil(float(maximum_length) / sequence_length))
 33 |     final_data = np.zeros([len(batch), max_run_total, stft_size, sequence_length])
 34 |     true_time = np.zeros([len(batch), max_run_total])
 35 | 
 36 |     # Read in a file and compute spectrum
 37 |     # for batch_idx, each_set in enumerate(batch):
 38 |     for batch_idx, Sxx in enumerate(Sxx_Vec):
 39 |         # f, t, Sxx = signal.stft(each_set, fs=rate_repository[0], nperseg=stft_size, return_onesided = False)
 40 | 
 41 |         # Magnitude and Phase Spectra
 42 |         Mag = Sxx.real
 43 |         t = t_vec[batch_idx]
 44 |         # Phase = Sxx.imag
 45 | 
 46 |         # Break up the spectrum in sequence_length sized data
 47 |         run_full_steps = float(len(t)) / sequence_length
 48 |         run_total = int(math.ceil(run_full_steps))
 49 | 
 50 |         # Run a loop long enough to break up all the data in the file into chunks of sequence_size
 51 |         for step in range(run_total):
 52 | 
 53 |             begin_point = step * sequence_length
 54 |             end_point = begin_point + sequence_length
 55 | 
 56 |             m, n = Mag[:, begin_point:end_point].shape
 57 | 
 58 |             # Store each chunk sequentially in a new array, accounting for zero padding when close to the end of the file
 59 |             if n == sequence_length:
 60 |                 final_data[batch_idx, step, :, :] = np.copy(Mag[:, begin_point:end_point])
 61 |                 true_time[batch_idx, step] = n
 62 |             else:
 63 |                 final_data[batch_idx, step, :, :] = np.copy(create_final_sequence(Mag[:, begin_point:end_point], sequence_length))
 64 |                 true_time[batch_idx, step] = n
 65 | 
 66 |     final_data = np.transpose(final_data, (0, 1, 3, 2))
 67 | 
 68 |     return final_data, true_time, maximum_length
 69 | 
 70 | def findMaxlen(data_vec):
 71 |     max_ = 0
 72 |     for each in data_vec:
 73 |         if len(each) > max_:
 74 |             max_ = len(each)
 75 |     return max_
 76 | 
 77 | def create_final_sequence(sequence, max_length):
 78 |     a, b = sequence.shape
 79 |     extra_len = max_length - b
 80 |     null_mat = np.zeros((len(sequence), extra_len), dtype=np.float32)
 81 |     sequence = np.concatenate((sequence, null_mat), axis=1)
 82 |     return sequence
 83 | 
 84 | # Directories
 85 | humanVoice = os.getcwd() + "/Training/HumanVoices/"
 86 | testData = os.getcwd() + "/Testing/NoiseAdded/"
 87 | modelOutput = os.getcwd() + "/Testing/ModelOutput/"
 88 | graphPath = os.getcwd() + "/TF_Checkpoints/FINAL.ckpt"
 89 | 
 90 | # Number of test files
 91 | testFileNum = 0
 92 | 
 93 | # File List
 94 | testNAFileList = []         # Test Dataset. Noise Added File List.
 95 | srcHVFileList = []          # Source Human Voice File List.
 96 | outputFileList = []         # Output File List
 97 | 
 98 | # File Repository
 99 | testNARateRepository = []
100 | testNADataRepository = []
101 | srcHVRateRepository = []
102 | srcHVDataRepository = []
103 | 
104 | norm_factor = (1.0 / 32768.0)         # Let data map to -1 ~ 1 range for LSTM process
105 | 
106 | # Walk all test NA files to File List and File Repository.
107 | for root, _, files in os.walk(testData):
108 |     files = sorted(files)
109 |     testFileNum = len(files)
110 | 
111 |     for f in files:
112 |         if f.endswith(".wav"):
113 |             testNAFileList.append(f)
114 |             rate, data = wav.read(os.path.join(root, f))
115 |             testNARateRepository.append(rate)
116 |             testNADataRepository.append(data * norm_factor)
117 | 
118 | srcHVFileList = list(map(formatSrcFilename, testNAFileList))
119 | outputFileList = list(map(formatOutputFilename, testNAFileList))
120 | 
121 | # Walk all source HV files to File Repository.
122 | for root, _, files in os.walk(humanVoice):
123 |     files = sorted(files)
124 | 
125 |     for f in files:
126 |         if(f.endswith(".wav")):
127 |             for name in srcHVFileList:
128 |                 if f == name:
129 |                     rate, data = wav.read(os.path.join(root, f))
130 |                     srcHVRateRepository.append(rate)
131 |                     srcHVDataRepository.append(data * norm_factor)
132 | 
133 | # STFT Process Variables, also used in LSTM
134 | sequence_length = 100
135 | stft_size = 1024
136 | batch_size = 1          # Set 1 for process 1 Wav file a time.
137 | 
138 | # Tensorflow vars + Graph and LSTM Params
139 | input_data = tf.placeholder(tf.float32, [None, sequence_length, stft_size])
140 | # clean_data = tf.placeholder(tf.float32, [None, sequence_length, stft_size])
141 | sequence_length_tensor = tf.placeholder(tf.int32, (None))
142 | 
143 | # TF Graph Definition
144 | lstm_cell = tf.contrib.rnn.BasicLSTMCell(stft_size, forget_bias = 1.0, state_is_tuple = True)
145 | # stacked_lstm = tf.contrib.rnn.MultiRNNCell([[lstm_cell] for i in number_of_layers])
146 | init_state = lstm_cell.zero_state(batch_size, tf.float32)
147 | rnn_outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, input_data, sequence_length=sequence_length_tensor, initial_state=init_state, time_major=False)
148 | # mse_loss = tf.losses.mean_squared_error(rnn_outputs, clean_data)
149 | # train_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse_loss)
150 | # train_optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(mse_loss)
151 | # train_optimizer = tf.train.AdagradDAOptimizer(learning_rate).minimize(mse_loss)
152 | # train_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse_loss)
153 | saver = tf.train.Saver()
154 | 
155 | # Initialize TF Graph and Restore the Graph
156 | init_op = tf.global_variables_initializer()  # initialize_all_variables()
157 | gpu_options = tf.GPUOptions(allow_growth = True)            # Set session GPU using growing.
158 | sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
159 | sess.run(init_op)
160 | saver.restore(sess, graphPath)
161 | print("\t***** TF GRAPH RESTORED *****")
162 | 
163 | # Start Processing
164 | for idx in range(testFileNum):
165 |     nowNAFile = []
166 |     nowNAFile.append(testNADataRepository[idx])
167 | 
168 |     # Get NA stft repository.
169 |     nowNAData_STFT, sequenceLengthID, maxLength = sequentialized_spectrum(nowNAFile)
170 | 
171 |     # Get Time Steps.
172 |     maxTimeSteps = len(nowNAData_STFT[0])
173 | 
174 |     # Define outputData List to contain rnn_outputs_value.
175 |     outputData = np.zeros([1,  maxTimeSteps, stft_size, sequence_length])           # Transpose, [0, 1, 3, 2]
176 | 
177 |     for timeStep in range(maxTimeSteps):
178 |         feed_dict = {
179 |             input_data : nowNAData_STFT[:, timeStep, :],
180 |             sequence_length_tensor : sequenceLengthID[:, timeStep]
181 |         }
182 |         final_state_value, rnn_outputs_value = sess.run([final_state, rnn_outputs], feed_dict=feed_dict)
183 | 
184 |         rnn_outputs_value = np.transpose(rnn_outputs_value, [0, 2, 1])
185 |         outputData[0][timeStep] = rnn_outputs_value
186 | 
187 |     # Define outputData_STFT, link outputData List by timeStep in 1 dimension.
188 |     outputData_STFT = np.zeros([stft_size, maxLength])
189 |     beginTime = 0
190 |     endTime = 0
191 |     for timeStep in range(maxTimeSteps):
192 |         if(timeStep < maxTimeSteps - 1):
193 |             endTime = beginTime + sequence_length
194 |             outputData_STFT[:, beginTime : endTime] = outputData[0, timeStep, :, :]
195 |         else:
196 |             endTime = beginTime + int(sequenceLengthID[0, timeStep])
197 |             outputData_STFT[:, beginTime : endTime] = outputData[0, timeStep, :, 0 : (endTime - beginTime)]
198 | 
199 |         beginTime = beginTime + sequence_length
200 | 
201 |     # Compute ISTFT
202 |     _, outputData_ISTFT = signal.istft(outputData_STFT, fs=testNARateRepository[0], nperseg=stft_size, input_onesided = False)
203 | 
204 |     outputData_ISTFT = (outputData_ISTFT / norm_factor).real
205 |     outputData_ISTFT = outputData_ISTFT.astype(np.int16)
206 | 
207 |     wav.write(modelOutput + outputFileList[idx], testNARateRepository[idx], outputData_ISTFT)
208 |     print("Index: " + str(idx))
209 |     print("\tOutput File: " + str(outputFileList[idx]))


--------------------------------------------------------------------------------
/02.GRUTraining/GRUModelTest.py:
--------------------------------------------------------------------------------
  1 | # This script aims to test the model using the Test Dataset seperated from source Dataset by CreateTestDataset.py
  2 | # Code by ShYy, 2018.
  3 | 
  4 | import scipy
  5 | import scipy.signal as signal
  6 | import numpy as np
  7 | import os
  8 | import random
  9 | import sys
 10 | import scipy.io.wavfile as wav
 11 | import tensorflow as tf
 12 | import math
 13 | 
 14 | # Get the source Human Voice file names by Noise Added file names.
 15 | def formatSrcFilename(filename):
 16 |     return filename[:len(filename) - 11] + "_voice.wav"
 17 | 
 18 | def formatOutputFilename(filename):
 19 |     return filename[:len(filename) - 11] + "_output.wav"
 20 | 
 21 | def sequentialized_spectrum(batch):
 22 |     # Get maximum length of batch
 23 |     t = []
 24 |     t_vec = []
 25 |     Sxx_Vec = []
 26 |     for each in batch:
 27 |         _, t, Sxx_Vec_Temp = signal.stft(each, fs=testNARateRepository[0], nperseg=stft_size, return_onesided = False)
 28 |         t_vec.append(t)
 29 |         Sxx_Vec.append(Sxx_Vec_Temp)
 30 |     maximum_length = findMaxlen(t_vec)
 31 | 
 32 |     max_run_total = int(math.ceil(float(maximum_length) / sequence_length))
 33 |     final_data = np.zeros([len(batch), max_run_total, stft_size, sequence_length], dtype=np.float32)
 34 |     final_data_imag = np.zeros([len(batch), max_run_total, stft_size, sequence_length], dtype=np.float32)
 35 |     true_time = np.zeros([len(batch), max_run_total], dtype=np.int32)
 36 | 
 37 |     # Read in a file and compute spectrum
 38 |     # for batch_idx, each_set in enumerate(batch):
 39 |     for batch_idx, Sxx in enumerate(Sxx_Vec):
 40 |         # f, t, Sxx = signal.stft(each_set, fs=rate_repository[0], nperseg=stft_size, return_onesided = False)
 41 | 
 42 |         # Magnitude and Phase Spectra
 43 |         Mag = Sxx.real
 44 |         Mag_Imag = Sxx.imag
 45 |         t = t_vec[batch_idx]
 46 |         # Phase = Sxx.imag
 47 | 
 48 |         # Break up the spectrum in sequence_length sized data
 49 |         run_full_steps = float(len(t)) / sequence_length
 50 |         run_total = int(math.ceil(run_full_steps))
 51 | 
 52 |         # Run a loop long enough to break up all the data in the file into chunks of sequence_size
 53 |         for step in range(run_total):
 54 | 
 55 |             begin_point = step * sequence_length
 56 |             end_point = begin_point + sequence_length
 57 | 
 58 |             m, n = Mag[:, begin_point:end_point].shape
 59 | 
 60 |             # Store each chunk sequentially in a new array, accounting for zero padding when close to the end of the file
 61 |             if n == sequence_length:
 62 |                 final_data[batch_idx, step, :, :] = np.copy(Mag[:, begin_point:end_point])
 63 |                 final_data_imag[batch_idx, step, :, :] = np.copy(Mag_Imag[:, begin_point:end_point])
 64 |                 true_time[batch_idx, step] = n
 65 |             else:
 66 |                 final_data[batch_idx, step, :, :] = np.copy(create_final_sequence(Mag[:, begin_point:end_point], sequence_length))
 67 |                 final_data_imag[batch_idx, step, :, :] = np.copy(create_final_sequence(Mag_Imag[:, begin_point:end_point], sequence_length))
 68 |                 true_time[batch_idx, step] = n
 69 | 
 70 |     final_data = np.transpose(final_data, (0, 1, 3, 2))
 71 |     final_data_imag = np.transpose(final_data_imag, (0, 1, 3, 2))
 72 | 
 73 |     return final_data, final_data_imag, true_time, maximum_length
 74 | 
 75 | def findMaxlen(data_vec):
 76 |     max_ = 0
 77 |     for each in data_vec:
 78 |         if len(each) > max_:
 79 |             max_ = len(each)
 80 |     return max_
 81 | 
 82 | def create_final_sequence(sequence, max_length):
 83 |     a, b = sequence.shape
 84 |     extra_len = max_length - b
 85 |     null_mat = np.zeros((len(sequence), extra_len), dtype=np.float32)
 86 |     sequence = np.concatenate((sequence, null_mat), axis=1)
 87 |     return sequence
 88 | 
 89 | # Directories
 90 | humanVoice = os.getcwd() + "/Training/HumanVoices/"
 91 | testData = os.getcwd() + "/Testing/NoiseAdded/"
 92 | modelOutput = os.getcwd() + "/Testing/ModelOutput/"
 93 | graphPath_Real = os.getcwd() + "/TF_Checkpoints/FINAL_Real.ckpt"
 94 | graphPath_Imag = os.getcwd() + "/TF_Checkpoints/FINAL_Imag.ckpt"
 95 | 
 96 | # Number of test files
 97 | testFileNum = 0
 98 | 
 99 | # File List
100 | testNAFileList = []         # Test Dataset. Noise Added File List.
101 | srcHVFileList = []          # Source Human Voice File List.
102 | outputFileList = []         # Output File List
103 | 
104 | # File Repository
105 | testNARateRepository = []
106 | testNADataRepository = []
107 | srcHVRateRepository = []
108 | srcHVDataRepository = []
109 | 
110 | norm_factor = (1.0 / 32768.0)         # Let data map to -1 ~ 1 range for LSTM process
111 | 
112 | # Walk all test NA files to File List and File Repository.
113 | for root, _, files in os.walk(testData):
114 |     files = sorted(files)
115 |     testFileNum = len(files)
116 | 
117 |     for f in files:
118 |         if f.endswith(".wav"):
119 |             testNAFileList.append(f)
120 |             rate, data = wav.read(os.path.join(root, f))
121 |             testNARateRepository.append(rate)
122 |             testNADataRepository.append(data * norm_factor)
123 | 
124 | srcHVFileList = list(map(formatSrcFilename, testNAFileList))
125 | outputFileList = list(map(formatOutputFilename, testNAFileList))
126 | 
127 | # Walk all source HV files to File Repository.
128 | for root, _, files in os.walk(humanVoice):
129 |     files = sorted(files)
130 | 
131 |     for f in files:
132 |         if(f.endswith(".wav")):
133 |             for name in srcHVFileList:
134 |                 if f == name:
135 |                     rate, data = wav.read(os.path.join(root, f))
136 |                     srcHVRateRepository.append(rate)
137 |                     srcHVDataRepository.append(data * norm_factor)
138 | 
139 | # STFT Process Variables, also used in LSTM
140 | sequence_length = 100
141 | stft_size = 1024
142 | batch_size = 1          # Set 1 for process 1 Wav file a time.
143 | number_of_layers = 3
144 | 
145 | # Tensorflow vars + Graph and LSTM Params
146 | input_data = tf.placeholder(tf.float32, [None, sequence_length, stft_size])
147 | # clean_data = tf.placeholder(tf.float32, [None, sequence_length, stft_size])
148 | sequence_length_tensor = tf.placeholder(tf.int32, (None))
149 | 
150 | # TF Graph Definition
151 | gru_cell = tf.contrib.rnn.GRUCell(stft_size, kernel_initializer = tf.zeros_initializer(dtype = tf.float32))
152 | # gru_cell = tf.contrib.rnn.DropoutWrapper(gru_cell, dtype = tf.float32, output_keep_prob = 0.5)            # Cancel Dropout
153 | stacked_gru = tf.contrib.rnn.MultiRNNCell([gru_cell] * number_of_layers, state_is_tuple=True)
154 | init_state = stacked_gru.zero_state(batch_size, tf.float32)
155 | rnn_outputs, final_state = tf.nn.dynamic_rnn(stacked_gru, input_data, sequence_length=sequence_length_tensor, initial_state=init_state, time_major=False)
156 | # mse_loss = tf.losses.mean_squared_error(rnn_outputs, clean_data)
157 | # train_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse_loss)
158 | # train_optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(mse_loss)
159 | # train_optimizer = tf.train.AdagradDAOptimizer(learning_rate).minimize(mse_loss)
160 | # train_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse_loss)
161 | saver = tf.train.Saver()
162 | 
163 | # Initialize TF Graph and Restore the Graph
164 | init_op = tf.global_variables_initializer()  # initialize_all_variables()
165 | gpu_options = tf.GPUOptions(allow_growth = True)            # Set session GPU using growing.
166 | sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
167 | sess.run(init_op)
168 | 
169 | # Start Processing
170 | for idx in range(testFileNum):
171 |     print("Index: " + str(idx + 1))
172 | 
173 |     # Read Real Part Graph
174 |     saver.restore(sess, graphPath_Real)
175 |     print("\t***** TF GRAPH REAL RESTORED *****")
176 | 
177 |     nowNAFile = []
178 |     nowNAFile.append(testNADataRepository[idx])
179 | 
180 |     # Get NA stft repository.
181 |     nowNAData_STFT_Real, nowNAData_STFT_Imag, sequenceLengthID, maxLength = sequentialized_spectrum(nowNAFile)
182 | 
183 |     # Get Time Steps.
184 |     maxTimeSteps = len(nowNAData_STFT_Real[0])
185 | 
186 |     # Define outputData List to contain rnn_outputs_value.
187 |     outputData_Real = np.zeros([1,  maxTimeSteps, stft_size, sequence_length])           # Transpose, [0, 1, 3, 2]
188 | 
189 |     for timeStep in range(maxTimeSteps):
190 |         feed_dict = {
191 |             input_data : nowNAData_STFT_Real[:, timeStep, :],
192 |             sequence_length_tensor : sequenceLengthID[:, timeStep]
193 |         }
194 |         final_state_value, rnn_outputs_value = sess.run([final_state, rnn_outputs], feed_dict=feed_dict)
195 | 
196 |         rnn_outputs_value = np.transpose(rnn_outputs_value, [0, 2, 1])
197 |         outputData_Real[0][timeStep] = rnn_outputs_value
198 | 
199 | 
200 |     # Read Imag Part Graph
201 |     saver.restore(sess, graphPath_Imag)
202 |     print("\t***** TF GRAPH IMAG RESTORED *****")
203 | 
204 |     outputData_Imag = np.zeros([1, maxTimeSteps, stft_size, sequence_length])  # Transpose, [0, 1, 3, 2]
205 | 
206 |     for timeStep in range(maxTimeSteps):
207 |         feed_dict = {
208 |             input_data : nowNAData_STFT_Imag[:, timeStep, :],
209 |             sequence_length_tensor : sequenceLengthID[:, timeStep]
210 |         }
211 |         final_state_value, rnn_outputs_value = sess.run([final_state, rnn_outputs], feed_dict=feed_dict)
212 | 
213 |         rnn_outputs_value = np.transpose(rnn_outputs_value, [0, 2, 1])
214 |         outputData_Imag[0][timeStep] = rnn_outputs_value
215 | 
216 | 
217 |     # outputData = np.zeros([1,  maxTimeSteps, stft_size, sequence_length], dtype=np.complex128)
218 |     outputData = np.vectorize(complex)(outputData_Real, outputData_Imag)
219 | 
220 |     # Define outputData_STFT, link outputData List by timeStep in 1 dimension.
221 |     outputData_STFT = np.zeros([stft_size, maxLength], dtype=np.complex128)
222 |     beginTime = 0
223 |     endTime = 0
224 |     for timeStep in range(maxTimeSteps):
225 |         if(timeStep < maxTimeSteps - 1):
226 |             endTime = beginTime + sequence_length
227 |             outputData_STFT[:, beginTime : endTime] = outputData[0, timeStep, :, :]
228 |         else:
229 |             endTime = beginTime + int(sequenceLengthID[0, timeStep])
230 |             outputData_STFT[:, beginTime : endTime] = outputData[0, timeStep, :, 0 : (endTime - beginTime)]
231 | 
232 |         beginTime = beginTime + sequence_length
233 | 
234 |     # Compute ISTFT
235 |     _, outputData_ISTFT = signal.istft(outputData_STFT, fs=testNARateRepository[0], nperseg=stft_size, input_onesided = False)
236 | 
237 |     outputData_ISTFT = ((outputData_ISTFT / norm_factor).real) / 0.75
238 |     outputData_ISTFT = outputData_ISTFT.astype(np.int16)
239 | 
240 |     wav.write(modelOutput + outputFileList[idx], testNARateRepository[idx], outputData_ISTFT)
241 |     print("\tOutput File: " + str(outputFileList[idx]) + "\n")


--------------------------------------------------------------------------------
/02.GRUTraining/LSTMTestTraining.py:
--------------------------------------------------------------------------------
  1 | # Code By adityatb at https://github.com/adityatb/noise-reduction-using-rnn
  2 | # LSTM method test.
  3 | # Maintain by ShYy, 2018.
  4 | 
  5 | import scipy
  6 | import scipy.signal as signal
  7 | import numpy as np
  8 | import os, random, sys
  9 | import scipy.io.wavfile as wav
 10 | import tensorflow as tf
 11 | import math
 12 | 
 13 | 
 14 | os.environ['CUDA_VISIBLE_DEVICES'] = '2'
 15 | 
 16 | def formatFilename(filename):
 17 |     return filename[:len(filename) - 11] + "_voice.wav"
 18 | 
 19 | 
 20 | # Strip away the _xnoise.wav part of the filename, and append _voice.wav to obtain clean voice counterpart
 21 | 
 22 | def create_final_sequence(sequence, max_length):
 23 |     a, b = sequence.shape
 24 |     extra_len = max_length - b
 25 |     null_mat = np.zeros((len(sequence), extra_len), dtype=np.float32)
 26 |     sequence = np.concatenate((sequence, null_mat), axis=1)
 27 |     return sequence
 28 | 
 29 | 
 30 | def sequentialized_spectrum(batch):
 31 |     # Get maximum length of batch
 32 |     t = []
 33 |     t_vec = []
 34 |     Sxx_Vec = []
 35 |     for each in batch:
 36 |         _, t, Sxx_Vec_Temp = signal.stft(each, fs=rate_repository[0], nperseg=stft_size, return_onesided = False)
 37 |         t_vec.append(t)
 38 |         Sxx_Vec.append(Sxx_Vec_Temp)
 39 |     maximum_length = findMaxlen(t_vec)
 40 | 
 41 |     max_run_total = int(math.ceil(float(maximum_length) / sequence_length))
 42 |     final_data = np.zeros([len(batch), max_run_total, stft_size, sequence_length])
 43 |     true_time = np.zeros([len(batch), max_run_total])
 44 | 
 45 |     # Read in a file and compute spectrum
 46 |     # for batch_idx, each_set in enumerate(batch):
 47 |     for batch_idx, Sxx in enumerate(Sxx_Vec):
 48 |         # f, t, Sxx = signal.stft(each_set, fs=rate_repository[0], nperseg=stft_size, return_onesided = False)
 49 | 
 50 |         # Magnitude and Phase Spectra
 51 |         Mag = Sxx.real
 52 |         t = t_vec[batch_idx]
 53 |         # Phase = Sxx.imag
 54 | 
 55 |         # Break up the spectrum in sequence_length sized data
 56 |         run_full_steps = float(len(t)) / sequence_length
 57 |         run_total = int(math.ceil(run_full_steps))
 58 | 
 59 |         # Run a loop long enough to break up all the data in the file into chunks of sequence_size
 60 |         for step in range(run_total):
 61 | 
 62 |             begin_point = step * sequence_length
 63 |             end_point = begin_point + sequence_length
 64 | 
 65 |             m, n = Mag[:, begin_point:end_point].shape
 66 | 
 67 |             # Store each chunk sequentially in a new array, accounting for zero padding when close to the end of the file
 68 |             if n == sequence_length:
 69 |                 final_data[batch_idx, step, :, :] = np.copy(Mag[:, begin_point:end_point])
 70 |                 true_time[batch_idx, step] = n
 71 |             else:
 72 |                 final_data[batch_idx, step, :, :] = np.copy(create_final_sequence(Mag[:, begin_point:end_point], sequence_length))
 73 |                 true_time[batch_idx, step] = n
 74 | 
 75 |     final_data = np.transpose(final_data, (0, 1, 3, 2))
 76 | 
 77 |     return final_data, true_time, maximum_length
 78 | 
 79 | 
 80 | def findMaxlen(data_vec):
 81 |     max_ = 0
 82 |     for each in data_vec:
 83 |         if len(each) > max_:
 84 |             max_ = len(each)
 85 |     return max_
 86 | 
 87 | 
 88 | # ----------------- Begin Vars --------------------- #
 89 | 
 90 | # Training data directories
 91 | traindata = os.getcwd() + "/Training/NoiseAdded/"
 92 | voicedata = os.getcwd() + "/Training/HumanVoices/"
 93 | checkpoints = os.getcwd() + "/TF_Checkpoints/"
 94 | 
 95 | # NormConstant
 96 | norm_factor = (1 / 32768.0)
 97 | 
 98 | # Spectrogram Parameters
 99 | stft_size = 1024
100 | 
101 | # RNN Specs
102 | sequence_length = 100
103 | batch_size = 10
104 | learning_rate = 0.001
105 | epochs = 250
106 | # number_of_layers = 3
107 | 
108 | # Tensorflow vars + Graph and LSTM Params
109 | input_data = tf.placeholder(tf.float32, [None, sequence_length, stft_size])
110 | clean_data = tf.placeholder(tf.float32, [None, sequence_length, stft_size])
111 | sequence_length_tensor = tf.placeholder(tf.int32, (None))
112 | 
113 | # Temp_data_variables
114 | no_of_files = 0
115 | temp_list = []
116 | final_data = []
117 | sequence_length_id = 0
118 | 
119 | # Repositories
120 | file_repository = []
121 | rate_repository = []
122 | clean_repository = []
123 | 
124 | # Selected vectors
125 | files_vec = []
126 | clean_files_fin_vec = []
127 | clean_files_vec = []
128 | 
129 | # Graph
130 | lstm_cell = tf.contrib.rnn.BasicLSTMCell(stft_size, forget_bias = 1.0, state_is_tuple = True)
131 | # stacked_lstm = tf.contrib.rnn.MultiRNNCell([[lstm_cell] for i in number_of_layers])
132 | init_state = lstm_cell.zero_state(batch_size, tf.float32)
133 | rnn_outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, input_data, sequence_length=sequence_length_tensor, initial_state=init_state, time_major=False)
134 | mse_loss = tf.losses.mean_squared_error(rnn_outputs, clean_data)
135 | # train_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse_loss)
136 | # train_optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(mse_loss)
137 | # train_optimizer = tf.train.AdagradDAOptimizer(learning_rate).minimize(mse_loss)
138 | train_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse_loss)
139 | saver = tf.train.Saver()
140 | 
141 | # ------------------- Read all data to memory creating a repository of mixture and clean files --------------------- #
142 | 
143 | os.chdir(traindata)
144 | # for file_iter in range(traindata):
145 | 
146 | # Buffer training data to memory for faster execution:
147 | for root, _, files in os.walk(traindata):
148 |     files = sorted(files)
149 |     no_of_files = len(files)
150 | 
151 |     if batch_size > no_of_files:
152 |         sys.exit("Error: batch_size cannot be more than number of files in the training directory")
153 | 
154 |     for f in files:
155 |         if f.endswith(".wav"):
156 |             temp_list.append(f)
157 |             srate, data = wav.read(os.path.join(root, f))
158 |             file_repository.append(data)
159 |             rate_repository.append(srate)
160 | 
161 | # Generate a vector of file names that are clean files
162 | clean_files_vec = list(map(formatFilename, temp_list))
163 | # clean_files_vec = list(map(None, *clean_files_vec))
164 | 
165 | # Find clean files that correspond to data in file_repository and buffer clean voice data to memory
166 | for root, _, files in os.walk(voicedata):
167 |     files = sorted(files)
168 |     for each in files:
169 |         if each.endswith(".wav"):
170 |             for name in clean_files_vec:
171 |                 if each == name:
172 |                     srate2, data2 = wav.read(os.path.join(root, name))
173 |                     clean_repository.append(data2)
174 | 
175 | # ------------------- Step 1: Prepare data in batches and perform STFTs --------------------- #
176 | 
177 | 
178 | # files_vec = []
179 | run_epochs = int((no_of_files / batch_size) * epochs)
180 | 
181 | # Initialize TF Graph
182 | init_op = tf.global_variables_initializer()  # initialize_all_variables()
183 | gpu_options = tf.GPUOptions(allow_growth = True)            # Set session GPU using growing.
184 | sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
185 | sess.run(init_op)
186 | 
187 | globalBatchLossSum = 0          # Sum of all batch losses
188 | globalStepsSum = 0          # Sum of all steps
189 | lastCumulativeLossAvg = 100           # Last Cumulative Loss Avg.
190 | 
191 | for idx in range(int(run_epochs)):
192 | 
193 |     files_vec = []
194 |     # clean_files_vec = []
195 |     clean_files_fin_vec = []
196 | 
197 |     # Select batch_size no. of random number of files from file_repository and the corresponding clean files
198 |     for file_iter in range(batch_size):
199 |         i = random.randint(0, len(file_repository) - 1)
200 |         files_vec.append(file_repository[i] * norm_factor)
201 |         clean_files_fin_vec.append(clean_repository[i] * norm_factor)
202 | 
203 |     stft_batch, sequence_length_id, maximum_length = sequentialized_spectrum(files_vec)
204 |     clean_voice_batch, sequence_length_id_clean, maximum_length_clean = sequentialized_spectrum(clean_files_fin_vec)
205 | 
206 |     # ------------------- Step 2: Feed Data to Placeholders, and then, Initialise, Train and Save the Graph  --------------------- #
207 | 
208 |     max_time_steps = stft_batch.shape[1]
209 |     batchLossSum = 0            # Sum of batch losses in one index.
210 | 
211 |     for time_seq in range(max_time_steps):
212 |         feed_dict = {
213 |             input_data: stft_batch[:, time_seq, :, :],
214 |             clean_data: clean_voice_batch[:, time_seq, :, :],
215 |             sequence_length_tensor: sequence_length_id[:, time_seq]
216 |         }
217 |         _, loss_value, final_state_value, rnn_outputs_val = sess.run([train_optimizer, mse_loss, final_state, rnn_outputs], feed_dict=feed_dict)
218 | 
219 |         # print("Index " + str(idx + 1) + " in " + str(run_epochs))
220 |         # print("\tOutput Min:\t" + str(np.min(rnn_outputs_val)))
221 |         # print("\tClean Min:\t" + str(np.min(clean_voice_batch[:, time_seq, :, :])))
222 |         # print("\tOutput Max:\t" + str(np.max(rnn_outputs_val)))
223 |         # print("\tClean Max:\t" + str(np.max(clean_voice_batch[:, time_seq, :, :])))
224 |         # print("\tBatch Loss:\t" + str(loss_value * 32768))          # Multiplied 32768 to show the batch losses obviously.
225 |         batchLossSum = batchLossSum + loss_value
226 | 
227 |     print("\t\tIndex " + str(idx + 1) + " Batch Loss Avg:\t" + str(batchLossSum / max_time_steps / norm_factor) + "\n")
228 | 
229 |     globalBatchLossSum = globalBatchLossSum + batchLossSum
230 |     globalStepsSum = globalStepsSum + max_time_steps
231 | 
232 |     if (int((idx + 1) % no_of_files) == 0):
233 |         # All batch losses sum divide global steps to get Avg
234 |         cumulativLossAvg = globalBatchLossSum / globalStepsSum
235 |         print("\n\t\tCumulative epochs loss Avg in latest " + str(idx + 1) + " indexes:\t" + str(cumulativLossAvg / norm_factor))
236 |         if(cumulativLossAvg <= lastCumulativeLossAvg):
237 |             lastCumulativeLossAvg = cumulativLossAvg            # If cumulative loss avg is smaller or equal to last avg, stay learning rate
238 |         else:
239 |             learning_rate = learning_rate / 5           # If cumulative loss avg is bigger than last avg, than change learning rate to 1/5
240 |             lastCumulativeLossAvg = cumulativLossAvg
241 |             print("\n\t\tLearning Rate changed to: " + str(learning_rate))
242 |         globalBatchLossSum = 0          # Initialize to 0, for next indexes batch loss calculation
243 |         globalStepsSum = 0
244 | 
245 |         os.chdir(checkpoints)
246 |         saver.save(sess, './ssep_model.ckpt', global_step=idx)
247 |         print("\t\tSaved checkpoint\n")
248 |         os.chdir(traindata)
249 | 
250 | os.chdir(checkpoints)
251 | saver.save(sess, './FINAL.ckpt')
252 | print("Saved FINAL")
253 | sess.close()


--------------------------------------------------------------------------------
/02.GRUTraining/GRUTraining.py:
--------------------------------------------------------------------------------
  1 | # Code By adityatb at https://github.com/adityatb/noise-reduction-using-rnn
  2 | # GRU method.
  3 | # Maintain by ShYy, 2018.
  4 | 
  5 | import scipy
  6 | import scipy.signal as signal
  7 | import numpy as np
  8 | import os, random, sys
  9 | import scipy.io.wavfile as wav
 10 | import tensorflow as tf
 11 | import math
 12 | 
 13 | 
 14 | os.environ['CUDA_VISIBLE_DEVICES'] = '2'
 15 | 
 16 | def formatFilename(filename):
 17 |     return filename[:len(filename) - 11] + "_voice.wav"
 18 | 
 19 | 
 20 | # Strip away the _xnoise.wav part of the filename, and append _voice.wav to obtain clean voice counterpart
 21 | 
 22 | def create_final_sequence(sequence, max_length):
 23 |     a, b = sequence.shape
 24 |     extra_len = max_length - b
 25 |     null_mat = np.zeros((len(sequence), extra_len), dtype=np.float32)
 26 |     sequence = np.concatenate((sequence, null_mat), axis=1)
 27 |     return sequence
 28 | 
 29 | 
 30 | def sequentialized_spectrum(batch):
 31 |     # Get maximum length of batch
 32 |     t = []
 33 |     t_vec = []
 34 |     Sxx_Vec = []
 35 |     for each in batch:
 36 |         _, t, Sxx_Vec_Temp = signal.stft(each, fs=rate_repository[0], nperseg=stft_size, return_onesided = False)
 37 |         t_vec.append(t)
 38 |         Sxx_Vec.append(Sxx_Vec_Temp)
 39 |     maximum_length = findMaxlen(t_vec)
 40 | 
 41 |     max_run_total = int(math.ceil(float(maximum_length) / sequence_length))
 42 |     final_data = np.zeros([len(batch), max_run_total, stft_size, sequence_length], dtype=np.float32)
 43 |     true_time = np.zeros([len(batch), max_run_total], dtype=np.int32)
 44 | 
 45 |     # Read in a file and compute spectrum
 46 |     # for batch_idx, each_set in enumerate(batch):
 47 |     for batch_idx, Sxx in enumerate(Sxx_Vec):
 48 |         # f, t, Sxx = signal.stft(each_set, fs=rate_repository[0], nperseg=stft_size, return_onesided = False)
 49 | 
 50 |         # Magnitude and Phase Spectra
 51 |         # Mag = Sxx.real
 52 |         Mag = Sxx.imag          # Get imaginary part of Sxx. This will try to get a imaginary model.
 53 |         t = t_vec[batch_idx]
 54 | 
 55 |         # # TESTING
 56 |         # _, outputData_ISTFT = signal.istft(Mag, fs=rate_repository[0], nperseg=stft_size,
 57 |         #                                    input_onesided=False)
 58 |         #
 59 |         # outputData_ISTFT = ((outputData_ISTFT / norm_factor).real) / 0.75
 60 |         # outputData_ISTFT = outputData_ISTFT.astype(np.int16)
 61 |         #
 62 |         # wav.write("0.TEST_REAL.wav", rate_repository[idx], outputData_ISTFT)
 63 |         #
 64 |         # _, outputData_ISTFT = signal.istft(Sxx, fs=rate_repository[0], nperseg=stft_size,
 65 |         #                                    input_onesided=False)
 66 |         #
 67 |         # outputData_ISTFT = ((outputData_ISTFT / norm_factor).real) / 0.75
 68 |         # outputData_ISTFT = outputData_ISTFT.astype(np.int16)
 69 |         #
 70 |         # wav.write("0.TEST_ORIG.wav", rate_repository[idx], outputData_ISTFT)
 71 |         # # TESTING END
 72 | 
 73 |         # Break up the spectrum in sequence_length sized data
 74 |         run_full_steps = float(len(t)) / sequence_length
 75 |         run_total = int(math.ceil(run_full_steps))
 76 | 
 77 |         # Run a loop long enough to break up all the data in the file into chunks of sequence_size
 78 |         for step in range(run_total):
 79 | 
 80 |             begin_point = step * sequence_length
 81 |             end_point = begin_point + sequence_length
 82 | 
 83 |             m, n = Mag[:, begin_point:end_point].shape
 84 | 
 85 |             # Store each chunk sequentially in a new array, accounting for zero padding when close to the end of the file
 86 |             if n == sequence_length:
 87 |                 final_data[batch_idx, step, :, :] = np.copy(Mag[:, begin_point:end_point])
 88 |                 true_time[batch_idx, step] = n
 89 |             else:
 90 |                 final_data[batch_idx, step, :, :] = np.copy(create_final_sequence(Mag[:, begin_point:end_point], sequence_length))
 91 |                 true_time[batch_idx, step] = n
 92 | 
 93 |     final_data = np.transpose(final_data, (0, 1, 3, 2))
 94 | 
 95 |     return final_data, true_time, maximum_length
 96 | 
 97 | 
 98 | def findMaxlen(data_vec):
 99 |     max_ = 0
100 |     for each in data_vec:
101 |         if len(each) > max_:
102 |             max_ = len(each)
103 |     return max_
104 | 
105 | 
106 | # ----------------- Begin Vars --------------------- #
107 | 
108 | # Training data directories
109 | traindata = os.getcwd() + "/Training/NoiseAdded/"
110 | voicedata = os.getcwd() + "/Training/HumanVoices/"
111 | checkpoints = os.getcwd() + "/TF_Checkpoints/"
112 | 
113 | # NormConstant
114 | norm_factor = (1 / 32768.0)
115 | 
116 | # Spectrogram Parameters
117 | stft_size = 1024
118 | 
119 | # RNN Specs
120 | sequence_length = 100
121 | batch_size = 10
122 | learning_rate = 0.0005
123 | epochs = 250
124 | number_of_layers = 3
125 | 
126 | # Tensorflow vars + Graph and GRU Params
127 | input_data = tf.placeholder(tf.float32, [None, sequence_length, stft_size])
128 | clean_data = tf.placeholder(tf.float32, [None, sequence_length, stft_size])
129 | sequence_length_tensor = tf.placeholder(tf.int32, (None))
130 | 
131 | # Temp_data_variables
132 | no_of_files = 0
133 | temp_list = []
134 | final_data = []
135 | sequence_length_id = 0
136 | 
137 | # Repositories
138 | file_repository = []
139 | rate_repository = []
140 | clean_repository = []
141 | 
142 | # Selected vectors
143 | files_vec = []
144 | clean_files_fin_vec = []
145 | clean_files_vec = []
146 | 
147 | # Graph
148 | gru_cell = tf.contrib.rnn.GRUCell(stft_size, kernel_initializer = tf.zeros_initializer(dtype = tf.float32))
149 | gru_cell = tf.contrib.rnn.DropoutWrapper(gru_cell, dtype = tf.float32, output_keep_prob = 0.5)
150 | stacked_gru = tf.contrib.rnn.MultiRNNCell([gru_cell] * number_of_layers, state_is_tuple=True)
151 | init_state = stacked_gru.zero_state(batch_size, dtype=tf.float32)
152 | rnn_outputs, final_state = tf.nn.dynamic_rnn(stacked_gru, input_data, sequence_length=sequence_length_tensor, initial_state=init_state, time_major=False)
153 | mse_loss = tf.losses.mean_squared_error(rnn_outputs, clean_data)
154 | # train_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse_loss)
155 | # train_optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(mse_loss)
156 | # train_optimizer = tf.train.AdagradDAOptimizer(learning_rate).minimize(mse_loss)
157 | train_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse_loss)
158 | saver = tf.train.Saver()
159 | 
160 | # ------------------- Read all data to memory creating a repository of mixture and clean files --------------------- #
161 | 
162 | os.chdir(traindata)
163 | # for file_iter in range(traindata):
164 | 
165 | # Buffer training data to memory for faster execution:
166 | for root, _, files in os.walk(traindata):
167 |     files = sorted(files)
168 |     no_of_files = len(files)
169 | 
170 |     if batch_size > no_of_files:
171 |         sys.exit("Error: batch_size cannot be more than number of files in the training directory")
172 | 
173 |     for f in files:
174 |         if f.endswith(".wav"):
175 |             temp_list.append(f)
176 |             srate, data = wav.read(os.path.join(root, f))
177 |             file_repository.append(data)
178 |             rate_repository.append(srate)
179 | 
180 | # Generate a vector of file names that are clean files
181 | clean_files_vec = list(map(formatFilename, temp_list))
182 | # clean_files_vec = list(map(None, *clean_files_vec))
183 | 
184 | # Find clean files that correspond to data in file_repository and buffer clean voice data to memory
185 | for root, _, files in os.walk(voicedata):
186 |     files = sorted(files)
187 |     for each in files:
188 |         if each.endswith(".wav"):
189 |             for name in clean_files_vec:
190 |                 if each == name:
191 |                     srate2, data2 = wav.read(os.path.join(root, name))
192 |                     # In Create Noise Adding Dataset, the NA audio is 0.75*Source + 0.25*Noise.
193 |                     # So we need let clean data*0.75.
194 |                     clean_repository.append(data2 * 0.75)
195 | 
196 | # ------------------- Step 1: Prepare data in batches and perform STFTs --------------------- #
197 | 
198 | 
199 | # files_vec = []
200 | run_epochs = int((no_of_files / batch_size) * epochs)
201 | 
202 | # Initialize TF Graph
203 | init_op = tf.global_variables_initializer()  # initialize_all_variables()
204 | gpu_options = tf.GPUOptions(allow_growth = True)            # Set session GPU using growing.
205 | sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
206 | sess.run(init_op)
207 | 
208 | globalBatchLossSum = 0          # Sum of all batch losses
209 | # globalStepsSum = 0          # Sum of all steps
210 | lastCumulativeLossSum = 99999           # Last Cumulative Loss Sum.
211 | 
212 | for idx in range(int(run_epochs)):
213 | 
214 |     files_vec = []
215 |     # clean_files_vec = []
216 |     clean_files_fin_vec = []
217 | 
218 |     # Select batch_size no. of random number of files from file_repository and the corresponding clean files
219 |     for file_iter in range(batch_size):
220 |         i = random.randint(0, len(file_repository) - 1)
221 |         files_vec.append(file_repository[i] * norm_factor)
222 |         clean_files_fin_vec.append(clean_repository[i] * norm_factor)
223 | 
224 |     stft_batch, sequence_length_id, maximum_length = sequentialized_spectrum(files_vec)
225 |     clean_voice_batch, sequence_length_id_clean, maximum_length_clean = sequentialized_spectrum(clean_files_fin_vec)
226 | 
227 |     # ------------------- Step 2: Feed Data to Placeholders, and then, Initialise, Train and Save the Graph  --------------------- #
228 | 
229 |     max_time_steps = stft_batch.shape[1]
230 |     batchLossSum = 0            # Sum of batch losses in one index.
231 | 
232 |     for time_seq in range(max_time_steps):
233 |         feed_dict = {
234 |             input_data: stft_batch[:, time_seq, :, :],
235 |             clean_data: clean_voice_batch[:, time_seq, :, :],
236 |             sequence_length_tensor: sequence_length_id[:, time_seq]
237 |         }
238 |         _, loss_value, final_state_value, rnn_outputs_val = sess.run([train_optimizer, mse_loss, final_state, rnn_outputs], feed_dict=feed_dict)
239 | 
240 |         # print("Index " + str(idx + 1) + " in " + str(run_epochs))
241 |         # print("\tOutput Min:\t" + str(np.min(rnn_outputs_val)))
242 |         # print("\tClean Min:\t" + str(np.min(clean_voice_batch[:, time_seq, :, :])))
243 |         # print("\tOutput Max:\t" + str(np.max(rnn_outputs_val)))
244 |         # print("\tClean Max:\t" + str(np.max(clean_voice_batch[:, time_seq, :, :])))
245 |         # print("\tBatch Loss:\t" + str(loss_value * 32768))          # Multiplied 32768 to show the batch losses obviously.
246 |         batchLossSum = batchLossSum + loss_value
247 | 
248 |     print("Index " + str(idx + 1) + "/" + str(run_epochs) + " Batch Loss Sum:\t" + str(batchLossSum / norm_factor) + "\n")
249 | 
250 |     globalBatchLossSum = globalBatchLossSum + batchLossSum
251 |     # globalStepsSum = globalStepsSum + max_time_steps
252 | 
253 |     if (int((idx + 1) % no_of_files) == 0):
254 |         # All batch losses sum divide global steps to get Avg
255 |         # cumulativLossAvg = globalBatchLossSum / globalStepsSum
256 |         cumulativeLossSum = globalBatchLossSum
257 |         print("\n\t\tCumulative epochs loss Sum in latest " + str(no_of_files) + " indexes:\t" + str(cumulativeLossSum / norm_factor))
258 |         if(cumulativeLossSum < lastCumulativeLossSum):
259 |             lastCumulativeLossSum = cumulativeLossSum            # If cumulative loss avg is smaller or equal to last avg, stay learning rate
260 |         else:
261 |             learning_rate = learning_rate / 5           # If cumulative loss avg is bigger than last avg, than change learning rate to 1/5
262 |             lastCumulativeLossSum = cumulativeLossSum
263 |             print("\n\t\tLearning Rate changed to: " + str(learning_rate))
264 |         globalBatchLossSum = 0          # Initialize to 0, for next indexes batch loss calculation
265 |         # globalStepsSum = 0
266 | 
267 |         os.chdir(checkpoints)
268 |         saver.save(sess, './ssep_model.ckpt', global_step=idx)
269 |         print("\t\tSaved checkpoint\n")
270 |         os.chdir(traindata)
271 | 
272 | os.chdir(checkpoints)
273 | saver.save(sess, './FINAL.ckpt')
274 | print("Saved FINAL")
275 | sess.close()


--------------------------------------------------------------------------------