├── neural_net ├── __init__.py ├── keras_tcn │ ├── __init__.py │ ├── adding_problem │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── README.md │ │ └── main.py │ ├── copy_memory │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── README.md │ │ └── main.py │ ├── mnist_pixel │ │ ├── __init__.py │ │ ├── utils.py │ │ └── main.py │ ├── tcn │ │ ├── __init__.py │ │ └── tcn.py │ ├── misc │ │ ├── Adding_Task.png │ │ ├── Dilated_Conv.png │ │ ├── Copy_Memory_Task.png │ │ └── Sequential_MNIST_Task.png │ ├── setup.py │ ├── LICENSE │ ├── .gitignore │ └── README.md ├── onsetSegmentEval │ ├── __init__.py │ └── evaluation.py ├── data │ ├── normal_jianzi.json │ ├── normal_special.json │ ├── mispronunciation_filelist_test.csv │ └── mispronunciation_filelist_train.csv ├── model │ └── segmentation │ │ └── jan_joint0.h5 ├── utils │ ├── utils_functions.py │ ├── csv_preprocessing.py │ ├── textgrid_preprocessing.py │ ├── audio_preprocessing.py │ └── textgridParser.py ├── training_scripts │ ├── generator.py │ ├── models_TCN.py │ ├── attention.py │ ├── hpc_code │ │ ├── train_run_jianzi_tcn.py │ │ ├── train_run_special_tcn.py │ │ ├── train_run_jianzi.py │ │ └── train_run_special.py │ └── models_RNN.py ├── plot_code.py ├── parameters.py ├── combine_feature_label.py ├── normal_pronunciation.py ├── file_path.py ├── viterbiDecodingPhonemeSeg.pyx └── training_sample_collection_syllable.py ├── kaldi_alignment ├── data │ ├── dict │ │ ├── silence_phones.txt │ │ ├── optional_silence.txt │ │ └── nonsilence_phones.txt │ ├── dict_test │ │ ├── silence_phones.txt │ │ ├── optional_silence.txt │ │ └── nonsilence_phones.txt │ ├── dev │ │ ├── reco2file_and_channel │ │ ├── wav.scp │ │ ├── spk2utt │ │ └── utt2spk │ ├── test │ │ ├── reco2file_and_channel │ │ └── wav.scp │ └── train │ │ └── reco2file_and_channel ├── conf │ └── mfcc.conf ├── cmd.sh ├── header.txt ├── path.sh ├── srcPy │ ├── csv_prepossessing.py │ ├── parse_decoded_pronunciation.py │ ├── mispronunciation_filelist_test.csv │ ├── filePath.py │ ├── mispron_eval.py │ ├── textgridParser.py │ ├── mispronunciation_filelist_train.csv │ └── parseLang.py ├── id2phone.R ├── ali2Phones.sh ├── splitAlignments.py ├── run.sh ├── createtextgridDan.praat └── createtextgridLaosheng.praat ├── .idea ├── libraries │ └── R_User_Library.xml ├── codeStyles │ └── codeStyleConfig.xml └── inspectionProfiles │ └── Project_Default.xml ├── .gitignore └── README.md /neural_net/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_net/onsetSegmentEval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/adding_problem/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/copy_memory/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/mnist_pixel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /kaldi_alignment/data/dict/silence_phones.txt: -------------------------------------------------------------------------------- 1 | sil 2 | -------------------------------------------------------------------------------- /kaldi_alignment/data/dict/optional_silence.txt: -------------------------------------------------------------------------------- 1 | sil 2 | -------------------------------------------------------------------------------- /kaldi_alignment/data/dict_test/silence_phones.txt: -------------------------------------------------------------------------------- 1 | sil 2 | -------------------------------------------------------------------------------- /kaldi_alignment/data/dict_test/optional_silence.txt: -------------------------------------------------------------------------------- 1 | sil 2 | -------------------------------------------------------------------------------- /kaldi_alignment/conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --use-energy=true 2 | --sample-frequency=44100 3 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/tcn/__init__.py: -------------------------------------------------------------------------------- 1 | from neural_net.keras_tcn.tcn import tcn 2 | -------------------------------------------------------------------------------- /kaldi_alignment/cmd.sh: -------------------------------------------------------------------------------- 1 | train_cmd="run.pl --mem 6G" 2 | decode_cmd="run.pl --mem 6G" 3 | -------------------------------------------------------------------------------- /kaldi_alignment/header.txt: -------------------------------------------------------------------------------- 1 | file_utt file id ali startinutt dur phone start_utt end_utt start end 2 | -------------------------------------------------------------------------------- /neural_net/data/normal_jianzi.json: -------------------------------------------------------------------------------- 1 | ["xiu", "jue", "xian", "xiang", "zheng", "xin", "qiu", "qie", "chu", "qing", "ji", "xiao", "qian", "xi"] -------------------------------------------------------------------------------- /neural_net/keras_tcn/misc/Adding_Task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ronggong/mispronunciation-detection/HEAD/neural_net/keras_tcn/misc/Adding_Task.png -------------------------------------------------------------------------------- /neural_net/keras_tcn/misc/Dilated_Conv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ronggong/mispronunciation-detection/HEAD/neural_net/keras_tcn/misc/Dilated_Conv.png -------------------------------------------------------------------------------- /neural_net/model/segmentation/jan_joint0.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ronggong/mispronunciation-detection/HEAD/neural_net/model/segmentation/jan_joint0.h5 -------------------------------------------------------------------------------- /neural_net/keras_tcn/misc/Copy_Memory_Task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ronggong/mispronunciation-detection/HEAD/neural_net/keras_tcn/misc/Copy_Memory_Task.png -------------------------------------------------------------------------------- /neural_net/keras_tcn/misc/Sequential_MNIST_Task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ronggong/mispronunciation-detection/HEAD/neural_net/keras_tcn/misc/Sequential_MNIST_Task.png -------------------------------------------------------------------------------- /.idea/libraries/R_User_Library.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='keras-tcn', 5 | version='1.4.0', 6 | description='Keras TCN', 7 | author='Philippe Remy', 8 | license='MIT', 9 | packages=['tcn'], 10 | install_requires=['tensorflow-gpu', 'numpy'] 11 | ) 12 | -------------------------------------------------------------------------------- /kaldi_alignment/data/dict/nonsilence_phones.txt: -------------------------------------------------------------------------------- 1 | ? 2 | @ 3 | 1 4 | 7 5 | 7N 6 | 9 7 | a 8 | aI^ 9 | an 10 | AN 11 | AU^ 12 | c 13 | E 14 | eI^ 15 | En 16 | f 17 | H 18 | i 19 | in 20 | iN 21 | j 22 | k 23 | l 24 | m 25 | M 26 | n 27 | @n 28 | N 29 | o 30 | O 31 | oU^ 32 | r\' 33 | u 34 | UN 35 | w 36 | x 37 | y 38 | yn 39 | sil_phone 40 | -------------------------------------------------------------------------------- /kaldi_alignment/data/dict_test/nonsilence_phones.txt: -------------------------------------------------------------------------------- 1 | ? 2 | @ 3 | 1 4 | 7 5 | 7N 6 | 9 7 | a 8 | aI^ 9 | an 10 | AN 11 | AU^ 12 | c 13 | E 14 | eI^ 15 | En 16 | f 17 | H 18 | i 19 | in 20 | iN 21 | j 22 | k 23 | l 24 | m 25 | M 26 | n 27 | @n 28 | N 29 | o 30 | O 31 | oU^ 32 | r\' 33 | u 34 | UN 35 | w 36 | x 37 | y 38 | yn 39 | sil_phone 40 | -------------------------------------------------------------------------------- /neural_net/utils/utils_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def smooth_obs(obs): 5 | """ 6 | hanning window smooth the onset observation function 7 | :param obs: syllable/phoneme onset function 8 | :return: 9 | """ 10 | hann = np.hanning(5) 11 | hann /= np.sum(hann) 12 | 13 | obs = np.convolve(hann, obs, mode='same') 14 | 15 | return obs -------------------------------------------------------------------------------- /kaldi_alignment/path.sh: -------------------------------------------------------------------------------- 1 | export KALDI_ROOT=/Users/ronggong/Documents_using/github/kaldi 2 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh 3 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH 4 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 5 | . $KALDI_ROOT/tools/config/common_path.sh 6 | export LC_ALL=C 7 | -------------------------------------------------------------------------------- /neural_net/data/normal_special.json: -------------------------------------------------------------------------------- 1 | ["na", "ai", "ri", "zhi", "ru", "shuo", "zhuang", "ming", "bei", "peng", "nei", "lai", "bai", "chang", "he", "cheng", "lei", "neng", "zhu", "jing", "shi", "lv", "me", "ding", "sheng", "wu", "xing", "qing", "hai", "shu", "ting", "ping", "meng", "ge", "quan", "an", "zheng", "bing", "ying", "e", "wo", "jie", "chu", "ke", "ceng", "que", "ji", "mao", "zei", "chun", "ling", "yuan", "fei", "ning", "deng", "zeng", "mai", "xie", "zhan", "zhao", "feng", "huai", "luo", "zhe"] -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 13 | -------------------------------------------------------------------------------- /neural_net/utils/csv_preprocessing.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | def open_csv_recordings(filename): 5 | recordings = [] 6 | with open(filename) as csvfile: 7 | readCSV = csv.reader(csvfile, delimiter=',') 8 | for row in readCSV: 9 | recordings.append(row) 10 | return recordings 11 | 12 | 13 | def write_csv_two_columns_list(two_columns_list, filename): 14 | with open(filename, 'wb') as csvfile: 15 | two_columns_writer = csv.writer(csvfile, delimiter=',') 16 | for l in two_columns_list: 17 | two_columns_writer.writerow(l) 18 | -------------------------------------------------------------------------------- /kaldi_alignment/srcPy/csv_prepossessing.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | def open_csv_recordings(filename): 5 | recordings = [] 6 | with open(filename) as csvfile: 7 | readCSV = csv.reader(csvfile, delimiter=',') 8 | for row in readCSV: 9 | recordings.append(row) 10 | return recordings 11 | 12 | 13 | def write_csv_two_columns_list(two_columns_list, filename): 14 | with open(filename, 'wb') as csvfile: 15 | two_columns_writer = csv.writer(csvfile, delimiter=',') 16 | for l in two_columns_list: 17 | two_columns_writer.writerow(l) 18 | -------------------------------------------------------------------------------- /neural_net/training_scripts/generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def generator_batch1(list_feature, labels, scaler, shuffle=True): 5 | """data generator""" 6 | ii = 0 7 | while True: 8 | if scaler: 9 | fea = scaler.transform(list_feature[ii]) 10 | else: 11 | fea = list_feature[ii] 12 | 13 | fea = np.expand_dims(fea, axis=0) 14 | lab = np.expand_dims(labels[ii], axis=0) 15 | 16 | yield fea, lab 17 | 18 | ii += 1 19 | 20 | if ii >= len(list_feature): 21 | ii = 0 22 | if shuffle: 23 | p = np.random.permutation(len(list_feature)) 24 | list_feature = [list_feature[ii_p] for ii_p in p] 25 | labels = labels[p] # labels is a numpy array -------------------------------------------------------------------------------- /neural_net/keras_tcn/adding_problem/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def data_generator(n, seq_length): 5 | """ 6 | Args: 7 | seq_length: Length of the adding problem data 8 | n: # of data in the set 9 | """ 10 | x_num = np.random.uniform(0, 1, (n, 1, seq_length)) 11 | x_mask = np.zeros([n, 1, seq_length]) 12 | y = np.zeros([n, 1]) 13 | for i in range(n): 14 | positions = np.random.choice(seq_length, size=2, replace=False) 15 | x_mask[i, 0, positions[0]] = 1 16 | x_mask[i, 0, positions[1]] = 1 17 | y[i, 0] = x_num[i, 0, positions[0]] + x_num[i, 0, positions[1]] 18 | x = np.concatenate((x_num, x_mask), axis=1) 19 | x = np.transpose(x, (0, 2, 1)) 20 | return x, y 21 | 22 | 23 | if __name__ == '__main__': 24 | print(data_generator(n=20, seq_length=10)) 25 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/adding_problem/README.md: -------------------------------------------------------------------------------- 1 | ## The Adding Problem 2 | 3 | ### Overview 4 | 5 | In this task, each input consists of a length-T sequence of depth 2, with all values randomly 6 | chosen randomly in [0, 1] in dimension 1. The second dimension consists of all zeros except for 7 | two elements, which are marked by 1. The objective is to sum the two random values whose second 8 | dimensions are marked by 1. One can think of this as computing the dot product of two dimensions. 9 | 10 | Simply predicting the sum to be 1 should give an MSE of about 0.1767. 11 | 12 | ### Data Generation 13 | 14 | See `data_generator` in `utils.py`. 15 | 16 | ### Note 17 | 18 | Because a TCN's receptive field depends on depth of the network and the filter size, we need 19 | to make sure these the model we use can cover the sequence length T. 20 | 21 | From: https://github.com/locuslab/TCN/ -------------------------------------------------------------------------------- /neural_net/keras_tcn/mnist_pixel/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.datasets import mnist 3 | from keras.utils import to_categorical 4 | 5 | 6 | def data_generator(): 7 | # input image dimensions 8 | img_rows, img_cols = 28, 28 9 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 10 | x_train = x_train.reshape(-1, img_rows * img_cols, 1) 11 | x_test = x_test.reshape(-1, img_rows * img_cols, 1) 12 | 13 | num_classes = 10 14 | y_train = to_categorical(y_train, num_classes) 15 | y_test = to_categorical(y_test, num_classes) 16 | 17 | y_train = np.expand_dims(y_train, axis=2) 18 | y_test = np.expand_dims(y_test, axis=2) 19 | 20 | x_train = x_train.astype('float32') 21 | x_test = x_test.astype('float32') 22 | x_train /= 255 23 | x_test /= 255 24 | 25 | return (x_train, y_train), (x_test, y_test) 26 | 27 | 28 | if __name__ == '__main__': 29 | print(data_generator()) 30 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/copy_memory/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def data_generator(t, mem_length, b_size): 5 | """ 6 | Generate data for the copying memory task 7 | :param t: The total blank time length 8 | :param mem_length: The length of the memory to be recalled 9 | :param b_size: The batch size 10 | :return: Input and target data tensor 11 | """ 12 | seq = np.array(np.random.randint(1, 9, size=(b_size, mem_length)), dtype=float) 13 | zeros = np.zeros((b_size, t)) 14 | marker = 9 * np.ones((b_size, mem_length + 1)) 15 | placeholders = np.zeros((b_size, mem_length)) 16 | 17 | x = np.array(np.concatenate((seq, zeros[:, :-1], marker), 1), dtype=int) 18 | y = np.array(np.concatenate((placeholders, zeros, seq), 1), dtype=int) 19 | return np.expand_dims(x, axis=2), np.expand_dims(y, axis=2) 20 | 21 | 22 | if __name__ == '__main__': 23 | print(data_generator(t=601, mem_length=10, b_size=1)[0].flatten()) 24 | -------------------------------------------------------------------------------- /neural_net/utils/textgrid_preprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | from neural_net.utils.textgridParser import textGrid2WordList 3 | from neural_net.utils.textgridParser import wordListsParseByLines 4 | 5 | 6 | def parse_syllable_line_list(ground_truth_text_grid_file, parent_tier, child_tier): 7 | 8 | if not os.path.isfile(ground_truth_text_grid_file): 9 | is_file_exist = False 10 | return False, is_file_exist, False 11 | else: 12 | is_file_exist = True 13 | 14 | # parse line 15 | line_list, _ = textGrid2WordList(ground_truth_text_grid_file, whichTier=parent_tier) 16 | 17 | # parse syllable 18 | syllable_list, is_syllable_found = textGrid2WordList(ground_truth_text_grid_file, whichTier=child_tier) 19 | 20 | # parse lines of ground truth 21 | nested_syllable_lists, _, _ = wordListsParseByLines(line_list, syllable_list) 22 | 23 | return nested_syllable_lists, is_file_exist, is_syllable_found 24 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/copy_memory/README.md: -------------------------------------------------------------------------------- 1 | ## Copying Memory Task 2 | 3 | ### Overview 4 | 5 | In this task, each input sequence has length T+20. The first 10 values are chosen randomly 6 | among the digits 1-8, with the rest being all zeros, except for the last 11 entries that are 7 | filled with the digit ‘9’ (the first ‘9’ is a delimiter). The goal is to generate an output 8 | of same length that is zero everywhere, except the last 10 values after the delimiter, where 9 | the model is expected to repeat the 10 values it encountered at the start of the input. 10 | 11 | ### Data Generation 12 | 13 | See `data_generator` in `utils.py`. 14 | 15 | ### Note 16 | 17 | - Because a TCN's receptive field depends on depth of the network and the filter size, we need 18 | to make sure these the model we use can cover the sequence length T+20. 19 | 20 | - Using the `--seq_len` flag, one can change the # of values to recall (the typical setup is 10). 21 | 22 | From: From: https://github.com/locuslab/TCN/ -------------------------------------------------------------------------------- /neural_net/plot_code.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.style 3 | import matplotlib as mpl 4 | mpl.style.use('classic') 5 | import matplotlib.pyplot as plt 6 | 7 | fontsize = 15 8 | 9 | 10 | def plot_spectro_att(mfcc0, 11 | att_vector, 12 | hopsize_t, 13 | filename_save): 14 | 15 | plt.figure(figsize=(16, 6)) 16 | 17 | ax1 = plt.subplot(2, 1, 1) 18 | y = np.arange(0, 80) 19 | x = np.arange(0, mfcc0.shape[0]) * hopsize_t 20 | plt.pcolormesh(x, y, np.transpose(mfcc0)) 21 | 22 | ax1.set_ylabel('Syllable\nlog-mel spectro', fontsize=fontsize) 23 | ax1.axis('tight') 24 | 25 | ax2 = plt.subplot(2, 1, 2) 26 | x = np.arange(0, len(att_vector)) * hopsize_t 27 | plt.plot(x, att_vector) 28 | 29 | ax2.set_ylabel('Attention\nvector', fontsize=fontsize) 30 | ax2.axis('tight') 31 | plt.xlabel('time (s)') 32 | 33 | plt.savefig(filename_save, bbox_inches='tight') 34 | 35 | # plt.show() -------------------------------------------------------------------------------- /kaldi_alignment/id2phone.R: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # id2phone.R 4 | # 5 | # 6 | # Created by Eleanor Chodroff on 3/24/15. 7 | # 8 | 9 | phones <- read.table("/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/data/lang/phones.txt", quote="\"") 10 | segments <- read.table("/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/data/train/segments", quote="\"") 11 | ctm <- read.table("/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/exp/mono_ali/merged_alignment.txt", quote="\"") 12 | 13 | names(ctm) <- c("file_utt","utt","start","dur","id") 14 | ctm$file <- gsub("_[0-9]*$","",ctm$file_utt) 15 | names(phones) <- c("phone","id") 16 | names(segments) <- c("file_utt","file","start_utt","end_utt") 17 | 18 | ctm2 <- merge(ctm, phones, by="id") 19 | ctm3 <- merge(ctm2, segments, by=c("file_utt","file")) 20 | ctm3$start_real <- ctm3$start + ctm3$start_utt 21 | ctm3$end_real <- ctm3$start_utt + ctm3$start_real + ctm3$dur 22 | 23 | write.table(ctm3, "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/exp/mono_ali/final_ali.txt", row.names=F, quote=F, sep="\t") -------------------------------------------------------------------------------- /neural_net/keras_tcn/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Philippe Rémy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /kaldi_alignment/ali2Phones.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #for i in exp/mono_ali/ali.*.gz; do 4 | # src/bin/ali-to-phones --ctm-output exp/mono_ali/final.mdl ark:"gunzip -c $i|" -> ${i%.gz}.ctm; 5 | #done; 6 | # 7 | #cd exp/mono_ali 8 | # 9 | #cat *.ctm > merged_alignment.txt 10 | # 11 | #cd ../.. 12 | # 13 | #R -f id2phone.R 14 | # 15 | #python splitAlignments.py 16 | # 17 | #cd ~/Desktop 18 | #rm -rf tmp && mkdir tmp || exit 1; 19 | # 20 | #header="/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/header.txt" 21 | # 22 | ## direct the terminal to the directory with the newly split session files 23 | ## ensure that the RegEx below will capture only the session files 24 | ## otherwise change this or move the other .txt files to a different folder 25 | # 26 | #for x in laosheng dan;do 27 | # cd /Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/splitAli/$x 28 | # for i in *.txt; do 29 | # cat "$header" "$i" > /Users/ronggong/Desktop/tmp/xx.$$ 30 | # mv /Users/ronggong/Desktop/tmp/xx.$$ "$i" 31 | # done 32 | #done 33 | # cd ../.. 34 | # 35 | #/Applications/Praat.app/Contents/MacOS/Praat "createtextgridDan.praat" 36 | /Applications/Praat.app/Contents/MacOS/praat "createtextgridLaosheng.praat" -------------------------------------------------------------------------------- /neural_net/parameters.py: -------------------------------------------------------------------------------- 1 | fs = 44100 2 | framesize_t = 0.025 # in second 3 | hopsize_t = 0.010 4 | 5 | framesize = int(round(framesize_t * fs)) 6 | hopsize = int(round(hopsize_t * fs)) 7 | 8 | highFrequencyBound = fs/2 if fs/2 < 11000 else 11000 9 | 10 | varin = {} 11 | # parameters of viterbi 12 | varin['delta_mode'] = 'proportion' 13 | varin['delta'] = 0.35 14 | 15 | 16 | def config_select(config): 17 | if config[0] == 1 and config[1] == 0: 18 | model_name = 'single_lstm' 19 | elif config[0] == 1 and config[1] == 1: 20 | model_name = 'single_lstm_single_dense' 21 | elif config[0] == 2 and config[1] == 0: 22 | model_name = 'two_lstm' 23 | elif config[0] == 2 and config[1] == 1: 24 | model_name = 'two_lstm_single_dense' 25 | elif config[0] == 2 and config[1] == 2: 26 | model_name = 'two_lstm_two_dense' 27 | elif config[0] == 3 and config[1] == 0: 28 | model_name = 'three_lstm' 29 | elif config[0] == 3 and config[1] == 1: 30 | model_name = 'three_lstm_single_dense' 31 | elif config[0] == 3 and config[1] == 2: 32 | model_name = 'three_lstm_two_dense' 33 | elif config[0] == 3 and config[1] == 3: 34 | model_name = 'three_lstm_three_dense' 35 | else: 36 | raise ValueError 37 | 38 | return model_name -------------------------------------------------------------------------------- /neural_net/combine_feature_label.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from neural_net.file_path import * 4 | 5 | 6 | def combine_feature_label(dict_positive, dict_negative): 7 | """ 8 | Combine positive and negative features and labels into two lists 9 | :param dict_positive: 10 | :param dict_negative: 11 | :return: 12 | """ 13 | X = [] 14 | y = [] 15 | for key in dict_positive: 16 | X += dict_positive[key] 17 | y += [1]*len(dict_positive[key]) 18 | 19 | for key in dict_negative: 20 | X += dict_negative[key] 21 | y += [0]*len(dict_negative[key]) 22 | 23 | return X, np.array(y) 24 | 25 | 26 | if __name__ == "__main__": 27 | with open(dict_special_positive, "rb") as f: 28 | feature_special_pos = pickle.load(f) 29 | 30 | with open(dict_special_negative, "rb") as f: 31 | feature_special_neg = pickle.load(f) 32 | 33 | with open(dict_jianzi_positive, "rb") as f: 34 | feature_jianzi_pos = pickle.load(f) 35 | 36 | with open(dict_jianzi_negative, "rb") as f: 37 | feature_jianzi_neg = pickle.load(f) 38 | 39 | X_special, y_special = combine_feature_label(dict_positive=feature_special_pos, 40 | dict_negative=feature_special_neg) 41 | 42 | X_jianzi, y_jianzi = combine_feature_label(dict_positive=feature_jianzi_pos, 43 | dict_negative=feature_jianzi_neg) 44 | 45 | print(np.count_nonzero(y_special), len(y_special)) 46 | print(np.count_nonzero(y_jianzi), len(y_jianzi)) -------------------------------------------------------------------------------- /kaldi_alignment/data/dev/reco2file_and_channel: -------------------------------------------------------------------------------- 1 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher teacher 1 2 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher teacher 1 3 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01 student_01 1 4 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02 student_02 1 5 | Dan_danAll_daeh-You_He_hou-He_hou_ma_dian-qm daeh-You_He_hou-He_hou_ma_dian-qm 1 6 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm dafeh-Bi_yun_tian-Xi_xiang_ji01-qm 1 7 | Dan_danAll_dafeh-Mo_lai_you-Liu_yue_xue-qm dafeh-Mo_lai_you-Liu_yue_xue-qm 1 8 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon 1 9 | Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm danbz-Qing_chen_qi-Shi_yu_zhuo-qm 1 10 | Dan_danAll_xixiangji_luanchouduo xixiangji_luanchouduo 1 11 | Dan_danAll_yutangchun_yutangchun yutangchun_yutangchun 1 12 | Dan_danAll_zhuangyuanmei_daocishi zhuangyuanmei_daocishi 1 13 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher teacher 1 14 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher teacher 1 15 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher teacher 1 16 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm lseh-Wo_ben_shi-Qiong_lin_yan-qm 1 17 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm 1 18 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm lsxp-Quan_qian_sui-Gan_lu_si-qm 1 19 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm lsxp-Shi_ye_shuo-Ding_jun_shan-qm 1 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/adding_problem/main.py: -------------------------------------------------------------------------------- 1 | import keras 2 | 3 | from tcn import tcn 4 | from utils import data_generator 5 | 6 | x_train, y_train = data_generator(n=200000, seq_length=600) 7 | x_test, y_test = data_generator(n=40000, seq_length=600) 8 | 9 | 10 | class PrintSomeValues(keras.callbacks.Callback): 11 | 12 | def on_epoch_begin(self, epoch, logs={}): 13 | print(f'x_test[0:1] = {x_test[0:1]}.') 14 | print(f'y_test[0:1] = {y_test[0:1]}.') 15 | print(f'pred = {self.model.predict(x_test[0:1])}.') 16 | 17 | 18 | def run_task(): 19 | model, param_str = tcn.dilated_tcn(output_slice_index='last', 20 | num_feat=x_train.shape[2], 21 | num_classes=0, 22 | nb_filters=24, 23 | kernel_size=8, 24 | dilatations=[1, 2, 4, 8], 25 | nb_stacks=8, 26 | max_len=x_train.shape[1], 27 | activation='norm_relu', 28 | use_skip_connections=False, 29 | return_param_str=True, 30 | regression=True) 31 | 32 | print(f'x_train.shape = {x_train.shape}') 33 | print(f'y_train.shape = {y_train.shape}') 34 | 35 | psv = PrintSomeValues() 36 | 37 | # Using sparse softmax. 38 | # http://chappers.github.io/web%20micro%20log/2017/01/26/quick-models-in-keras/ 39 | model.summary() 40 | 41 | model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=500, 42 | callbacks=[psv], batch_size=128) 43 | 44 | 45 | if __name__ == '__main__': 46 | run_task() 47 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/copy_memory/main.py: -------------------------------------------------------------------------------- 1 | import keras 2 | 3 | from utils import data_generator 4 | from tcn import tcn 5 | 6 | x_train, y_train = data_generator(601, 10, 10000) 7 | x_test, y_test = data_generator(601, 10, 2000) 8 | 9 | 10 | class PrintSomeValues(keras.callbacks.Callback): 11 | 12 | def on_epoch_begin(self, epoch, logs={}): 13 | print(f'x_test[0:1] = {x_test[0:1].flatten()}.') 14 | print(f'y_test[0:1] = {y_test[0:1].flatten()}.') 15 | print(f'p.shape = {self.model.predict(x_test[0:1]).shape}.') 16 | print(f'p(x_test[0:1]) = {self.model.predict(x_test[0:1]).argmax(axis=2).flatten()}.') 17 | 18 | 19 | def run_task(): 20 | print(sum(x_train[0].tolist(), [])) 21 | print(sum(y_train[0].tolist(), [])) 22 | 23 | model, param_str = tcn.dilated_tcn(num_feat=1, 24 | num_classes=10, 25 | nb_filters=10, 26 | kernel_size=8, 27 | dilatations=[1, 2, 4, 8], 28 | nb_stacks=8, 29 | max_len=x_train[0:1].shape[1], 30 | activation='norm_relu', 31 | use_skip_connections=False, 32 | return_param_str=True) 33 | 34 | print(f'x_train.shape = {x_train.shape}') 35 | print(f'y_train.shape = {y_train.shape}') 36 | 37 | psv = PrintSomeValues() 38 | 39 | # Using sparse softmax. 40 | # http://chappers.github.io/web%20micro%20log/2017/01/26/quick-models-in-keras/ 41 | model.summary() 42 | 43 | model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100, 44 | callbacks=[psv], batch_size=128) 45 | 46 | 47 | if __name__ == '__main__': 48 | run_task() 49 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .idea/ 6 | .DS_Store 7 | 8 | *.tsv 9 | *.tar.gz 10 | *out* 11 | credentials.json 12 | 13 | *.json 14 | 15 | nohup.out 16 | *.out 17 | *.txt 18 | 19 | # C extensions 20 | *.so 21 | 22 | # Distribution / packaging 23 | .Python 24 | env/ 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | .hypothesis/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # celery beat schedule file 90 | celerybeat-schedule 91 | 92 | # SageMath parsed files 93 | *.sage.py 94 | 95 | # dotenv 96 | .env 97 | 98 | # virtualenv 99 | .venv 100 | venv/ 101 | ENV/ 102 | 103 | # Spyder project settings 104 | .spyderproject 105 | .spyproject 106 | 107 | # Rope project settings 108 | .ropeproject 109 | 110 | # mkdocs documentation 111 | /site 112 | 113 | # mypy 114 | .mypy_cache/ 115 | -------------------------------------------------------------------------------- /kaldi_alignment/splitAlignments.py: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # splitAlignments.py 4 | # 5 | # 6 | # Created by Eleanor Chodroff on 3/25/15. 7 | # 8 | # 9 | # 10 | import sys 11 | import csv 12 | import os 13 | 14 | results=[] 15 | 16 | # name = name of first text file in final_ali.txt 17 | # name_fin = name of final text file in final_ali.txt 18 | 19 | name = "Dan_jingju_a_cappella_singing_dataset_danAll_daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon" 20 | name_fin = "Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_teacher" 21 | 22 | try: 23 | with open("./exp/mono_ali/final_ali.txt") as f: 24 | next(f) #skip header 25 | for line in f: 26 | columns=line.split("\t") 27 | name_prev = name 28 | dataset = name_prev.split('_')[0] 29 | name = columns[1] 30 | if (name_prev != name): 31 | try: 32 | path_roletype = os.path.join('./splitAli',dataset.lower()) 33 | if not os.path.exists(path_roletype): 34 | os.makedirs(path_roletype) 35 | with open(os.path.join('./splitAli',dataset.lower(),name_prev[len(dataset)+1:]+".txt"),'w') as fwrite: 36 | writer = csv.writer(fwrite) 37 | fwrite.write("\n".join(results)) 38 | fwrite.close() 39 | #print name 40 | except Exception as e: 41 | print("Failed to write file", e) 42 | sys.exit(2) 43 | del results[:] 44 | results.append(line[0:-1]) 45 | else: 46 | results.append(line[0:-1]) 47 | except Exception as e: 48 | print("Failed to read file", e) 49 | sys.exit(1) 50 | # this prints out the last textfile (nothing following it to compare with) 51 | try: 52 | with open(os.path.join('./splitAli', dataset.lower(), name_prev[len(dataset)+1:]+".txt"),'w') as fwrite: 53 | writer = csv.writer(fwrite) 54 | fwrite.write("\n".join(results)) 55 | fwrite.close() 56 | #print name 57 | except Exception as e: 58 | print("Failed to write file", e) 59 | sys.exit(2) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mispronunciation detection 2 | Mispronunciation detection code for jingju singing voice. 3 | 4 | Rong Gong's thesis "Automatic assessment of singing voice pronunciation: 5 | a case study with jingju music" chapter 6. 6 | 7 | This repo contains two methods: 8 | 9 | * Baseline - forced alignment system built on Kaldi. 10 | * Deep learning - discriminative model built using Keras and Tensorflow. 11 | 12 | ## Baseline 13 | The main idea of the forced alignment-based mispronunciation detect is to use 14 | two lexicons respectively for training and testing phases. The detail of this 15 | idea is described in section 6.2.1 in the thesis. 16 | 17 | We here only explain the general pipeline of the model training and testing. Please 18 | write to the author if you want to know how to use the code for your own 19 | experiment. Pipeline: 20 | 21 | 1. generate language dictionary by using `srcPy/parseLang.py`. 22 | 2. generate all the files that Kaldi need, e.g., text, wav.scp, phone.txt, by 23 | `srcPy/parseTextRepetition.py`. 24 | 3. run the model training and decode the text for test data by `run.sh` 25 | 4. parse decoded pronunciation by `srcPy/parse_decoded_pronunciation.py` 26 | 5. evaluation `srcPy/mispron_eval.py` 27 | 28 | ## Deep learning-based discriminative model 29 | 30 | We built discriminative models for mispronunciation detection. Two types of model 31 | are built, one for special pronunciation, another for jiantuanzi syllables. We have 32 | experimented several deep learning architectures, such as BiLSTM, CNN, attention, 33 | Temporal convolutional networks (TCNs), self-attention. The details are described in 34 | sections 6.3 and 6.4 in the thesis. Here, we also only describe the pipeline of model 35 | training and testing. Please write to the auther if you want to use the code for your own 36 | experiment. Pipeline: 37 | 38 | 1. collecting training logarithmic Mel representation by `training_sample_collection_syllable.py` 39 | 2. train various deep learning architectures by using `train_rnn_jianzi.py` or `train_rnn_special.py` respectively for 40 | special pronunciation and jiantuanzi models. e.g., attention var can be `feedforward` or `selfatt`. 41 | 3. train TCNs architectures by using `train_rnn_special_tcn.py` and `train_rnn_jianzi_tcn.py` 42 | 4. evaluation by `eval.py` 43 | 44 | ## Contact 45 | Rong Gong: rong.gong\upf.edu 46 | 47 | ## Code license 48 | GNU Affero General Public License 3.0 49 | -------------------------------------------------------------------------------- /kaldi_alignment/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | exec 5> debug_output.txt 4 | BASH_XTRACEFD="5" 5 | 6 | . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. 7 | ## This relates to the queue. 8 | . ./path.sh 9 | 10 | H=`pwd` #exp home 11 | n=2 #parallel jobs 12 | 13 | 14 | for x in train test; do 15 | perl local/prepare_stm.pl data/$x || exit 1; 16 | utils/fix_data_dir.sh data/$x || exit 1; 17 | done 18 | 19 | #produce MFCC features 20 | rm -rf data/mfcc && mkdir -p data/mfcc && cp -R data/{train,test} data/mfcc || exit 1; 21 | for x in train test; do 22 | #make mfcc 23 | steps/make_mfcc.sh --nj $n --cmd "$train_cmd" data/mfcc/$x exp/make_mfcc/$x mfcc/$x || exit 1; 24 | #compute cmvn 25 | steps/compute_cmvn_stats.sh data/mfcc/$x exp/mfcc_cmvn/$x mfcc/$x || exit 1; 26 | done 27 | 28 | rm -rf data/local/ data/lang && rm -f data/dict/lexiconp.txt || exit 1; 29 | 30 | # remove the language files for test data 31 | rm -rf data/local_test/ data/lang_test && rm -f data/dict_test/lexiconp.txt || exit 1; 32 | 33 | #lang 34 | # prepare language stuffs for train data 35 | utils/prepare_lang.sh --sil-prob 0.0 --position_dependent_phones false data/dict "" data/local/ data/lang || exit 1; 36 | 37 | # prepare language stuffs for test data 38 | utils/prepare_lang.sh --sil-prob 0.0 --position_dependent_phones false data/dict_test "" data/local_test/ data/lang_test || exit 1; 39 | 40 | # monophone 41 | steps/train_mono.sh --boost-silence 1.0 --nj $n --cmd "$train_cmd" data/mfcc/train data/lang exp/mono || exit 1; 42 | 43 | #monophone_ali train 44 | steps/align_si.sh --boost-silence 1.0 --nj $n --cmd "$train_cmd" data/mfcc/train data/lang exp/mono exp/mono_ali || exit 1; 45 | 46 | word level alignment 47 | steps/get_train_ctm.sh data/mfcc/train data/lang exp/mono_ali || exit 1; 48 | 49 | for n in `seq $n`; do gunzip -k -f exp/mono_ali/ctm.$n.gz; done || exit 1; 50 | 51 | . ./ali2Phones.sh 52 | 53 | #monophone_ali test 54 | steps/align_si.sh --boost-silence 1.0 --nj $n --cmd "$train_cmd" data/mfcc/test data/lang_test exp/mono exp/mono_test_ali || exit 1; 55 | 56 | # word level alignment 57 | steps/get_train_ctm.sh data/mfcc/test data/lang_test exp/mono_test_ali || exit 1; 58 | 59 | for n in `seq $n`; do gunzip -k -f exp/mono_test_ali/ctm.$n.gz; done || exit 1; 60 | 61 | steps/get_prons.sh data/mfcc/test data/lang_test exp/mono_test_ali 62 | 63 | gunzip -c exp/mono_test_ali/prons.*.gz | utils/sym2int.pl -f 4 data/lang_test/words.txt | utils/sym2int.pl -f 5- data/lang_test/phones.txt 64 | -------------------------------------------------------------------------------- /neural_net/training_scripts/models_TCN.py: -------------------------------------------------------------------------------- 1 | from neural_net.keras_tcn.tcn import tcn 2 | from neural_net.training_scripts.generator import generator_batch1 3 | from keras.callbacks import EarlyStopping 4 | from keras.callbacks import CSVLogger 5 | from keras.callbacks import ModelCheckpoint 6 | 7 | 8 | def train_TCN_batch(list_feature_fold_train, 9 | labels_fold_train, 10 | list_feature_fold_val, 11 | labels_fold_val, 12 | batch_size, 13 | input_shape, 14 | file_path_model, 15 | filename_log, 16 | epoch, 17 | patience, 18 | scaler, 19 | dropout, 20 | summ=False, 21 | verbose=2): 22 | 23 | model, param_str = tcn.dilated_tcn(output_slice_index='last', # try 'first'. 24 | num_feat=input_shape[-1], 25 | num_classes=2, 26 | nb_filters=16, 27 | kernel_size=3, 28 | dilatations=[0, 1, 3, 5], 29 | nb_stacks=1, 30 | max_len=None, 31 | dropout=dropout, 32 | activation='norm_relu', 33 | use_skip_connections=False, 34 | return_param_str=True) 35 | 36 | if summ: 37 | model.summary() 38 | 39 | callbacks = [ModelCheckpoint(file_path_model, monitor='val_loss', verbose=0, save_best_only=True), 40 | EarlyStopping(monitor='val_loss', patience=patience, verbose=0), 41 | CSVLogger(filename=filename_log, separator=';')] 42 | 43 | print("start training with validation...") 44 | 45 | generator_train = generator_batch1(list_feature=list_feature_fold_train, 46 | labels=labels_fold_train, 47 | scaler=scaler) 48 | 49 | generator_val = generator_batch1(list_feature=list_feature_fold_val, 50 | labels=labels_fold_val, 51 | scaler=scaler) 52 | 53 | model.fit_generator(generator=generator_train, 54 | steps_per_epoch=len(list_feature_fold_train)/batch_size, 55 | validation_data=generator_val, 56 | validation_steps=len(list_feature_fold_val)/batch_size, 57 | callbacks=callbacks, 58 | epochs=epoch, 59 | verbose=verbose) 60 | 61 | return model -------------------------------------------------------------------------------- /neural_net/keras_tcn/mnist_pixel/main.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | 3 | from utils import data_generator 4 | from tcn import tcn 5 | 6 | 7 | def get_activations(model, model_inputs, print_shape_only=False, layer_name=None): 8 | print('----- activations -----') 9 | activations = [] 10 | inp = model.input 11 | 12 | model_multi_inputs_cond = True 13 | if not isinstance(inp, list): 14 | # only one input! let's wrap it in a list. 15 | inp = [inp] 16 | model_multi_inputs_cond = False 17 | 18 | outputs = [layer.output for layer in model.layers if 19 | layer.name == layer_name or layer_name is None] # all layer outputs 20 | 21 | funcs = [K.function(inp + [K.learning_phase()], [out]) for out in outputs] # evaluation functions 22 | 23 | if model_multi_inputs_cond: 24 | list_inputs = [] 25 | list_inputs.extend(model_inputs) 26 | list_inputs.append(0.) 27 | else: 28 | list_inputs = [model_inputs, 0.] 29 | 30 | # Learning phase. 0 = Test mode (no dropout or batch normalization) 31 | # layer_outputs = [func([model_inputs, 0.])[0] for func in funcs] 32 | layer_outputs = [func(list_inputs)[0] for func in funcs] 33 | for layer_activations in layer_outputs: 34 | activations.append(layer_activations) 35 | if print_shape_only: 36 | print(layer_activations.shape) 37 | else: 38 | print(layer_activations) 39 | return activations 40 | # np.sum(activations[15].squeeze(), axis=1) 41 | 42 | 43 | def run_task(): 44 | (x_train, y_train), (x_test, y_test) = data_generator() 45 | 46 | model, param_str = tcn.dilated_tcn(output_slice_index='last', # try 'first'. 47 | num_feat=1, 48 | num_classes=10, 49 | nb_filters=64, 50 | kernel_size=8, 51 | dilatations=[1, 2, 4, 8], 52 | nb_stacks=8, 53 | max_len=x_train[0:1].shape[1], 54 | activation='norm_relu', 55 | use_skip_connections=False, 56 | return_param_str=True) 57 | 58 | print(f'x_train.shape = {x_train.shape}') 59 | print(f'y_train.shape = {y_train.shape}') 60 | print(f'x_test.shape = {x_test.shape}') 61 | print(f'y_test.shape = {y_test.shape}') 62 | 63 | model.summary() 64 | 65 | # a = np.zeros_like(x_train[0:1]) 66 | # a[:, 0, :] = 1.0 67 | # print(get_activations(model, a)) 68 | 69 | model.fit(x_train, y_train.squeeze().argmax(axis=1), epochs=100, 70 | validation_data=(x_test, y_test.squeeze().argmax(axis=1))) 71 | 72 | 73 | if __name__ == '__main__': 74 | run_task() 75 | -------------------------------------------------------------------------------- /kaldi_alignment/srcPy/parse_decoded_pronunciation.py: -------------------------------------------------------------------------------- 1 | """ 2 | write the decoded text for test set 3 | """ 4 | import json 5 | from kaldi_alignment.srcPy.filePath import * 6 | 7 | 8 | def open_decoded_pronunciation(filename): 9 | utt = [] 10 | with open(filename) as file: 11 | for row in file.readlines(): 12 | utt.append(row.replace('\t', '').replace('\t\n', '')) 13 | return utt 14 | 15 | 16 | def parse_lexicon_to_list(lexicon): 17 | list_lexicon = [] 18 | with open(lexicon) as file: 19 | for row in file.readlines(): 20 | row = row.replace('\n', '') 21 | list_lexicon.append([row.split(' ')[0], row.split(' ')[1:]]) 22 | return list_lexicon 23 | 24 | 25 | def lexicon_finder(dict_lexicon_organized, pho_list): 26 | """ 27 | find the corresponding pho_list in lexicon organized 28 | """ 29 | for syl_organized, dict_pho_list in dict_lexicon_organized.items(): 30 | if pho_list == dict_pho_list: 31 | return syl_organized 32 | 33 | raise ValueError("Not found word entry for {}".format(pho_list)) 34 | 35 | 36 | if __name__ == "__main__": 37 | 38 | path_test_ali = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/exp/mono_test_ali/" 39 | path_lang_test = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/data/dict_test" 40 | filename_decoded_pronunciation = os.path.join(path_test_ali, "pron_perutt_nowb.txt") 41 | 42 | list_lexicon = parse_lexicon_to_list(os.path.join(path_lang_test, 'lexicon.txt')) 43 | 44 | with open(os.path.join(path_lang, "dict_lexicon_repetition_syllable_special.json"), "r") as read_file: 45 | dict_lexicon = json.load(read_file) 46 | 47 | utts = open_decoded_pronunciation(filename_decoded_pronunciation) 48 | 49 | with open(os.path.join(path_test_ali, 'text_decoded'), "w") as f: 50 | for utt in utts: 51 | utt_list = utt.split('\t') 52 | utt_organized = [utt_list[0]] 53 | 54 | for pho_list in utt_list[1:]: 55 | if pho_list != 'SIL sil': 56 | # find all the pronunciations for the syl in repetitive lexicon 57 | syl = pho_list.split(' ')[0] 58 | pron_decoded = pho_list.split(' ')[1:] 59 | 60 | # gather all the special pronunciation for the syl 61 | list_syllable_special_unit = [] 62 | for special, pron_syl in dict_lexicon.items(): 63 | if syl in pron_syl[1]: 64 | list_syllable_special_unit.append([special, pron_syl[0]]) 65 | 66 | # match the special pronunciation 67 | for special, pron in list_syllable_special_unit: 68 | if pron_decoded == pron: 69 | utt_organized.append(''.join([i for i in special if not i.isdigit()])) 70 | break 71 | f.write(' '.join(utt_organized)+'\n') 72 | -------------------------------------------------------------------------------- /neural_net/data/mispronunciation_filelist_test.csv: -------------------------------------------------------------------------------- 1 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student01,, 2 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student02,, 3 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student03,, 4 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student04,, 5 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student01,, 6 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student02,, 7 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student03,, 8 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student04,, 9 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student05,, 10 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student06,, 11 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student07,, 12 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_01,, 13 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_02,, 14 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_03,, 15 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_04,, 16 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_05,, 17 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_02,, 18 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_01,, 19 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_02,, 20 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_03,, 21 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_04,, 22 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_05,, 23 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_06,, 24 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_01,, 25 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_02,, 26 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_04_mentougou,, 27 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_01,, 28 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_02,, 29 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_03,, 30 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_01_dxjky,, 31 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_01_sizhu,, 32 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_02_dxjky,, 33 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_03_dxjky,, 34 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_04_dxjky,, 35 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_05_dxjky,, 36 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_06_mentougou,, -------------------------------------------------------------------------------- /kaldi_alignment/srcPy/mispronunciation_filelist_test.csv: -------------------------------------------------------------------------------- 1 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student01,, 2 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student02,, 3 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student03,, 4 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student04,, 5 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student01,, 6 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student02,, 7 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student03,, 8 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student04,, 9 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student05,, 10 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student06,, 11 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student07,, 12 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_01,, 13 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_02,, 14 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_03,, 15 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_04,, 16 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_05,, 17 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_02,, 18 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_01,, 19 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_02,, 20 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_03,, 21 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_04,, 22 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_05,, 23 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_06,, 24 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_01,, 25 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_02,, 26 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_04_mentougou,, 27 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_01,, 28 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_02,, 29 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_03,, 30 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_01_dxjky,, 31 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_01_sizhu,, 32 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_02_dxjky,, 33 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_03_dxjky,, 34 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_04_dxjky,, 35 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_05_dxjky,, 36 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_06_mentougou,, -------------------------------------------------------------------------------- /kaldi_alignment/srcPy/filePath.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | from kaldi_alignment.srcPy.textgridParser import syllableTextgridExtraction 4 | from kaldi_alignment.srcPy.csv_prepossessing import open_csv_recordings 5 | 6 | path_root = '/Users/ronggong/Documents_using/MTG_document/Jingju_arias/' 7 | 8 | path_nacta = 'jingju_a_cappella_singing_dataset' 9 | path_nacta2017 = 'jingju_a_cappella_singing_dataset_extended_nacta2017' 10 | path_primary = 'primary_school_recording' 11 | 12 | dataset_laosheng = 'qmLonUpf/laosheng' 13 | dataset_ss = 'sourceSeparation' 14 | 15 | path_data_train = '../data/train' 16 | path_data_dev = '../data/dev' 17 | path_data_test = '../data/test' 18 | path_data_LM = '../data/LM' 19 | 20 | path_lang = '../data/dict' 21 | 22 | recordings_train = open_csv_recordings("mispronunciation_filelist_train.csv") 23 | recordings_test = open_csv_recordings("mispronunciation_filelist_test.csv") 24 | 25 | 26 | def getRecordings(wav_path): 27 | recordings = [] 28 | for root, subFolders, files in os.walk(wav_path): 29 | for f in files: 30 | file_prefix, file_extension = os.path.splitext(f) 31 | if file_prefix != '.DS_Store': 32 | recordings.append(file_prefix) 33 | 34 | return recordings 35 | 36 | 37 | def parse_recordings(rec): 38 | if rec[0] == "part1": 39 | data_path = path_nacta 40 | sub_folder = rec[2] 41 | textgrid_folder = "textgrid" 42 | wav_folder = "wav_left" 43 | syllable_tier = "dian" 44 | if rec[3][:2] == 'da': 45 | roletype = 'Dan' 46 | elif rec[3][:2] == 'ls': 47 | roletype = 'Laosheng' 48 | else: 49 | raise ValueError("Not exist a role-type {} for file {}".format(rec[3][:2], rec)) 50 | elif rec[0] == "part2": 51 | data_path = path_nacta2017 52 | sub_folder = rec[2] 53 | textgrid_folder = "textgridDetails" 54 | wav_folder = "wav" 55 | syllable_tier = "dianSilence" 56 | if rec[3][:2] == 'da': 57 | roletype = 'Dan' 58 | elif rec[3][:2] == 'ls': 59 | roletype = 'Laosheng' 60 | else: 61 | raise ValueError("Not exist a role-type {} for file {}".format(rec[3][:2], rec)) 62 | else: 63 | data_path = path_primary 64 | sub_folder = rec[1] + "/" + rec[2] 65 | textgrid_folder = "textgrid" 66 | wav_folder = "wav_left" 67 | syllable_tier = "dianSilence" 68 | if rec[2][:2] == 'da': 69 | roletype = 'Dan' 70 | elif rec[2][:2] == 'ls': 71 | roletype = 'Laosheng' 72 | else: 73 | raise ValueError("Not exist a role-type {} for file {}".format(rec[2][:2], rec)) 74 | 75 | filename = rec[3] 76 | line_tier = "line" 77 | longsyllable_tier = "longsyllable" 78 | phoneme_tier = "details" 79 | special_tier = "special" 80 | special_class_tier = "specialClass" 81 | 82 | return data_path, sub_folder, textgrid_folder, \ 83 | wav_folder, filename, line_tier, \ 84 | longsyllable_tier, syllable_tier, phoneme_tier, \ 85 | special_tier, special_class_tier, roletype 86 | 87 | 88 | if __name__ == '__main__': 89 | for fn in getRecordings(os.path.join(path_root, path_nacta, 'textgrid', 'laosheng')): 90 | print('[\'laosheng\', \''+fn+'\'],') -------------------------------------------------------------------------------- /neural_net/onsetSegmentEval/evaluation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Syllable segmentation evaluation: landmark and boundary evaluations 5 | Only evaluate boundary onset 6 | 7 | [1] A new hybrid approach for automatic speech signal segmentation 8 | using silence signal detection, energy convex hull, and spectral variation 9 | 10 | [2] Syll-O-Matic: An adaptive time-frequency representation 11 | for the automatic segmentation of speech into syllables 12 | 13 | [3] EVALUATION FRAMEWORK FOR AUTOMATIC SINGING 14 | TRANSCRIPTION 15 | """ 16 | 17 | from neural_net.onsetSegmentEval.phonemeMap import misMatchIgnorePhn 18 | from neural_net.onsetSegmentEval.phonemeMap import misMatchIgnoreSyl 19 | from neural_net.parameters import hopsize_t 20 | import numpy as np 21 | 22 | 23 | def onsetEval(groundtruthOnsets, detectedOnsets, tolerance, label): 24 | """ 25 | :param groundtruthOnsets: [[onset time, onset label], ...] 26 | :param detectedOnsets: [[onset time, onset label], ...] 27 | :param tolerance: 0.025 or 0.05 28 | :param label: True or False, if we want to evaluate the label 29 | :return: 30 | """ 31 | 32 | numDetectedOnsets = len(detectedOnsets) 33 | numGroundtruthOnsets = len(groundtruthOnsets) 34 | 35 | onsetCorrectlist = [0]*numDetectedOnsets 36 | 37 | for gtb in groundtruthOnsets: 38 | for idx, db in enumerate(detectedOnsets): 39 | onsetTh = tolerance # onset threshold 40 | 41 | if abs(db[0]-gtb[0]) 0 & startnextutt = 0 102 | Insert boundary... 1 start 103 | Set interval text... 1 int+1 'phone$' 104 | Insert boundary... 1 end 105 | elsif start = 0 106 | Set interval text... 1 int 'phone$' 107 | elsif start > 0 108 | Insert boundary... 1 start 109 | Set interval text... 1 int+1 'phone$' 110 | endif 111 | #pause 112 | endfor 113 | #pause 114 | Write to text file... 'dir$'/'basename$'.TextGrid 115 | select Table times 116 | plus Sound 'filename_noext$' 117 | plus TextGrid 'filename_noext$' 118 | Remove 119 | endfor -------------------------------------------------------------------------------- /kaldi_alignment/createtextgridLaosheng.praat: -------------------------------------------------------------------------------- 1 | # Created 3-27-15 Eleanor Chodroff 2 | 3 | wavDir$ = "/Users/ronggong/Documents_using/MTG_document/Jingju_arias" 4 | dir$ = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/splitAli/laosheng" 5 | 6 | Create Strings as file list... list_txt 'dir$'/*.txt 7 | nFiles = Get number of strings 8 | 9 | for i from 1 to nFiles 10 | 11 | select Strings list_txt 12 | filename$ = Get string... i 13 | basename$ = filename$ - ".txt" 14 | txtname$ = filename$ - ".txt" 15 | 16 | if startsWith (basename$, "primary_school_recording") 17 | 18 | rind_2017_start = rindex (basename$, "2017") 19 | 20 | subfoldername_len = length (basename$) - rind_2017_start + 1 21 | # pathname$ = left$ (basename$, rind - 2) 22 | subfoldername$ = right$ (basename$, subfoldername_len) 23 | artistname$ = left$ (subfoldername$, index (subfoldername$, "_") - 1) 24 | subfoldername2$ = right$ (subfoldername$, length (subfoldername$) - length (artistname$) - 1) 25 | 26 | rind_t = rindex (subfoldername2$, "teacher") 27 | rind_s = rindex (subfoldername2$, "student") 28 | if rind_t 29 | rind = rind_t 30 | else 31 | rind = rind_s 32 | endif 33 | filename_len = length (subfoldername2$) - rind + 1 34 | filename_noext$ = right$ (subfoldername2$, filename_len) 35 | 36 | subfoldername2$ = left$ (subfoldername2$, length (subfoldername2$) - filename_len - 1) 37 | 38 | #writeInfoLine: basename$ 39 | #writeInfoLine: pathname$ 40 | #writeInfoLine: subfoldername$ 41 | #writeInfoLine: artistname$ 42 | #writeInfoLine: subfoldername2$ 43 | #writeInfoLine: filename_noext$ 44 | 45 | Read from file... 'wavDir$'/primary_school_recording/wav_left/'artistname$'/'subfoldername2$'/'filename_noext$'.wav 46 | else 47 | rind_dan = rindex (basename$, "danAll") 48 | rind_laosheng = rindex (basename$, "laosheng") 49 | if rind_dan 50 | rind_roletype_start = rind_dan 51 | rind_roletype_end = rind_dan + 6 52 | roletype$ = "danAll" 53 | else 54 | rind_roletype_start = rind_laosheng 55 | rind_roletype_end = rind_laosheng + 8 56 | roletype$ = "laosheng" 57 | endif 58 | 59 | filename_len = length (basename$) - rind_roletype_end 60 | #path_roletype_name = left$ (basename$, rind_roletype_start) 61 | pathname$ = left$ (basename$, rind_roletype_start - 2) 62 | filename_noext$ = right$ (basename$, filename_len) 63 | 64 | #writeInfoLine: rind_roletype_start 65 | #writeInfoLine: rind_roletype_end 66 | #writeInfoLine: basename$ 67 | #writeInfoLine: roletype$ 68 | #writeInfoLine: filename_noext$ 69 | 70 | Read from file... 'wavDir$'/jingju_a_cappella_singing_dataset/wav_left/'roletype$'/'filename_noext$'.wav 71 | endif 72 | 73 | dur = Get total duration 74 | To TextGrid... "kaldiphone" 75 | 76 | #pause 'txtname$' 77 | 78 | select Strings list_txt 79 | Read Table from tab-separated file... 'dir$'/'txtname$'.txt 80 | Rename... times 81 | nRows = Get number of rows 82 | Sort rows... start 83 | for j from 1 to nRows 84 | select Table times 85 | startutt_col$ = Get column label... 5 86 | start_col$ = Get column label... 10 87 | dur_col$ = Get column label... 6 88 | phone_col$ = Get column label... 7 89 | if j < nRows 90 | startnextutt = Get value... j+1 'startutt_col$' 91 | else 92 | startnextutt = 0 93 | endif 94 | start = Get value... j 'start_col$' 95 | phone$ = Get value... j 'phone_col$' 96 | dur = Get value... j 'dur_col$' 97 | end = start + dur 98 | select TextGrid 'filename_noext$' 99 | int = Get interval at time... 1 start+0.005 100 | if start > 0 & startnextutt = 0 101 | Insert boundary... 1 start 102 | Set interval text... 1 int+1 'phone$' 103 | Insert boundary... 1 end 104 | elsif start = 0 105 | Set interval text... 1 int 'phone$' 106 | elsif start > 0 107 | Insert boundary... 1 start 108 | Set interval text... 1 int+1 'phone$' 109 | endif 110 | #pause 111 | endfor 112 | #pause 113 | Write to text file... 'dir$'/'basename$'.TextGrid 114 | select Table times 115 | plus Sound 'filename_noext$' 116 | plus TextGrid 'filename_noext$' 117 | Remove 118 | endfor -------------------------------------------------------------------------------- /neural_net/file_path.py: -------------------------------------------------------------------------------- 1 | import os 2 | from neural_net.utils.csv_preprocessing import open_csv_recordings 3 | 4 | dir_path = os.path.dirname(os.path.realpath(__file__)) 5 | 6 | path_root = '/Users/ronggong/Documents_using/MTG_document/Jingju_arias/' 7 | 8 | path_nacta = 'jingju_a_cappella_singing_dataset' 9 | path_nacta2017 = 'jingju_a_cappella_singing_dataset_extended_nacta2017' 10 | path_primary = 'primary_school_recording' 11 | 12 | recordings_train = open_csv_recordings(os.path.join(dir_path, "data/mispronunciation_filelist_train.csv")) 13 | recordings_test = open_csv_recordings(os.path.join(dir_path, "data/mispronunciation_filelist_test.csv")) 14 | 15 | filename_normal_special = os.path.join(dir_path, "data/normal_special.json") 16 | filename_normal_jianzi = os.path.join(dir_path, "data/normal_jianzi.json") 17 | 18 | dict_special_positive = os.path.join(dir_path, "data/special_positive.pkl") 19 | dict_special_negative = os.path.join(dir_path, "data/special_negative.pkl") 20 | dict_jianzi_positive = os.path.join(dir_path, "data/jianzi_positive.pkl") 21 | dict_jianzi_negative = os.path.join(dir_path, "data/jianzi_negative.pkl") 22 | 23 | joint_cnn_model_path = os.path.join(dir_path, 'model', 'segmentation') 24 | 25 | filename_special_model = os.path.join(dir_path, "model", "special_model_prod_True_True_0.5.h5") 26 | filename_jianzi_model = os.path.join(dir_path, "model", "jianzi_model_prod_True_True_0.5.h5") 27 | # filename_jianzi_model = os.path.join(dir_path, "model", "jianzi_model_prod_feedforward_True_0.5.h5") 28 | 29 | # filename_special_model = os.path.join(dir_path, "model", "special_model_prod_tcn_0.05.h5") 30 | # filename_jianzi_model = os.path.join(dir_path, "model", "jianzi_model_prod_tcn_0.05.h5") 31 | 32 | filename_result_decoded_mispronunciaiton = os.path.join(dir_path, "results", "text_decoded_special_True_True_0.5") 33 | # filename_result_decoded_mispronunciaiton = os.path.join(dir_path, "results", "text_decoded_special_feedforward_True_0.5") 34 | 35 | path_figs_jianzi = "/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/figs/jianzi" 36 | 37 | 38 | def getRecordings(wav_path): 39 | recordings = [] 40 | for root, subFolders, files in os.walk(wav_path): 41 | for f in files: 42 | file_prefix, file_extension = os.path.splitext(f) 43 | if file_prefix != '.DS_Store': 44 | recordings.append(file_prefix) 45 | 46 | return recordings 47 | 48 | 49 | def parse_recordings(rec): 50 | if rec[0] == "part1": 51 | data_path = path_nacta 52 | sub_folder = rec[2] 53 | textgrid_folder = "textgrid" 54 | wav_folder = "wav_left" 55 | syllable_tier = "dian" 56 | if rec[3][:2] == 'da': 57 | roletype = 'Dan' 58 | elif rec[3][:2] == 'ls': 59 | roletype = 'Laosheng' 60 | else: 61 | raise ValueError("Not exist a role-type {} for file {}".format(rec[3][:2], rec)) 62 | elif rec[0] == "part2": 63 | data_path = path_nacta2017 64 | sub_folder = rec[2] 65 | textgrid_folder = "textgridDetails" 66 | wav_folder = "wav" 67 | syllable_tier = "dianSilence" 68 | if rec[3][:2] == 'da': 69 | roletype = 'Dan' 70 | elif rec[3][:2] == 'ls': 71 | roletype = 'Laosheng' 72 | else: 73 | raise ValueError("Not exist a role-type {} for file {}".format(rec[3][:2], rec)) 74 | else: 75 | data_path = path_primary 76 | sub_folder = rec[1] + "/" + rec[2] 77 | textgrid_folder = "textgrid" 78 | wav_folder = "wav_left" 79 | syllable_tier = "dianSilence" 80 | if rec[2][:2] == 'da': 81 | roletype = 'Dan' 82 | elif rec[2][:2] == 'ls': 83 | roletype = 'Laosheng' 84 | else: 85 | raise ValueError("Not exist a role-type {} for file {}".format(rec[2][:2], rec)) 86 | 87 | filename = rec[3] 88 | line_tier = "line" 89 | longsyllable_tier = "longsyllable" 90 | phoneme_tier = "details" 91 | special_tier = "special" 92 | special_class_tier = "specialClass" 93 | 94 | return data_path, sub_folder, textgrid_folder, \ 95 | wav_folder, filename, line_tier, \ 96 | longsyllable_tier, syllable_tier, phoneme_tier, \ 97 | special_tier, special_class_tier, roletype -------------------------------------------------------------------------------- /kaldi_alignment/data/test/reco2file_and_channel: -------------------------------------------------------------------------------- 1 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student01 student01 1 2 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student02 student02 1 3 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student03 student03 1 4 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student04 student04 1 5 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student01 student01 1 6 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student02 student02 1 7 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student03 student03 1 8 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student04 student04 1 9 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student05 student05 1 10 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student06 student06 1 11 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student07 student07 1 12 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_01 student_01 1 13 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_02 student_02 1 14 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_03 student_03 1 15 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_04 student_04 1 16 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_05 student_05 1 17 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_student_02 student_02 1 18 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_01 student_01 1 19 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_02 student_02 1 20 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_03 student_03 1 21 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_04 student_04 1 22 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_05 student_05 1 23 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_06 student_06 1 24 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_01_dxjky student_01_dxjky 1 25 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_01_sizhu student_01_sizhu 1 26 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_02_dxjky student_02_dxjky 1 27 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_03_dxjky student_03_dxjky 1 28 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_04_dxjky student_04_dxjky 1 29 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_05_dxjky student_05_dxjky 1 30 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_06_mentougou student_06_mentougou 1 31 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_01 student_01 1 32 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_02 student_02 1 33 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_04_mentougou student_04_mentougou 1 34 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_01 student_01 1 35 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_02 student_02 1 36 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_03 student_03 1 37 | -------------------------------------------------------------------------------- /neural_net/utils/audio_preprocessing.py: -------------------------------------------------------------------------------- 1 | from madmom.processors import SequentialProcessor 2 | import numpy as np 3 | 4 | EPSILON = np.spacing(1) 5 | 6 | 7 | def Fprev_sub(x,w=2): 8 | """ 9 | # D = prev_sub(X,W) calculate the shifted x, with shifting frames 2 10 | input feature*frame 11 | """ 12 | # pad data by repeating first and last columns 13 | if w > 0: 14 | # shift to right 15 | xx = np.hstack((np.tile(x[:,0], (w,1)).transpose(), x[:,:-w])) 16 | if w < 0: 17 | # shift to left 18 | xx = np.hstack((x[:,-w:], np.tile(x[:,-1], (-w,1)).transpose())) 19 | if w==0: 20 | raise ValueError("shifting frame coef can't be 0.") 21 | 22 | # plt.figure() 23 | # plt.pcolormesh(xx) 24 | # plt.show() 25 | 26 | return xx 27 | 28 | 29 | def _nbf_2D(log_mel, nlen): 30 | """shift the feature and concatenate it in both left and right sides for nlen""" 31 | 32 | log_mel = np.array(log_mel).transpose() 33 | log_mel_out = np.array(log_mel, copy=True) 34 | for ii in range(1, nlen + 1): 35 | log_mel_right_shift = Fprev_sub(log_mel, w=ii) 36 | log_mel_left_shift = Fprev_sub(log_mel, w=-ii) 37 | log_mel_out = np.vstack((log_mel_right_shift, log_mel_out, log_mel_left_shift)) 38 | feature = log_mel_out.transpose() 39 | return feature 40 | 41 | 42 | class MadmomMelbankProcessor(SequentialProcessor): 43 | 44 | def __init__(self, fs, hopsize_t): 45 | from madmom.audio.signal import SignalProcessor, FramedSignalProcessor 46 | from madmom.audio.stft import ShortTimeFourierTransformProcessor 47 | from madmom.audio.filters import MelFilterbank 48 | from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, 49 | LogarithmicSpectrogramProcessor) 50 | 51 | # define pre-processing chain 52 | sig = SignalProcessor(num_channels=1, sample_rate=fs) 53 | frames = FramedSignalProcessor(frame_size=2048, hopsize=int(fs*hopsize_t)) 54 | stft = ShortTimeFourierTransformProcessor() # caching FFT window 55 | filt = FilteredSpectrogramProcessor( 56 | filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000, 57 | norm_filters=True, unique_filters=False) 58 | spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON) 59 | 60 | single = SequentialProcessor([frames, stft, filt, spec]) 61 | 62 | pre_processor = SequentialProcessor([sig, single]) 63 | 64 | super(MadmomMelbankProcessor, self).__init__([pre_processor]) 65 | 66 | 67 | def get_log_mel_madmom(audio_fn, fs, hopsize_t, channel, context=False): 68 | """ 69 | calculate log mel feature by madmom 70 | :param audio_fn: 71 | :param fs: 72 | :param hopsize_t: 73 | :param channel: 74 | :return: 75 | """ 76 | madmomMelbankProc = MadmomMelbankProcessor(fs, hopsize_t) 77 | mfcc = madmomMelbankProc(audio_fn) 78 | 79 | if context: 80 | if channel == 1: 81 | mfcc = _nbf_2D(mfcc, 7) 82 | else: 83 | mfcc_conc = [] 84 | for ii in range(3): 85 | mfcc_conc.append(_nbf_2D(mfcc[:,:,ii], 7)) 86 | mfcc = np.stack(mfcc_conc, axis=2) 87 | 88 | return mfcc 89 | 90 | 91 | def feature_reshape(feature, nlen=10): 92 | """ 93 | reshape mfccBands feature into n_sample * n_row * n_col 94 | :param feature: 95 | :param nlen: 96 | :return: 97 | """ 98 | 99 | n_sample = feature.shape[0] 100 | n_row = 80 101 | n_col = nlen*2+1 102 | 103 | feature_reshaped = np.zeros((n_sample,n_row,n_col),dtype='float32') 104 | # print("reshaping feature...") 105 | for ii in range(n_sample): 106 | # print ii 107 | feature_frame = np.zeros((n_row,n_col),dtype='float32') 108 | for jj in range(n_col): 109 | feature_frame[:,jj] = feature[ii][n_row*jj:n_row*(jj+1)] 110 | feature_reshaped[ii,:,:] = feature_frame 111 | return feature_reshaped 112 | 113 | 114 | def segmentMfccLine(line, hopsize_t, mfccs): 115 | """ 116 | segment line level mfccs 117 | :param line: [start_time, end_time, lyrics] 118 | :return: 119 | """ 120 | # start and end time 121 | time_start = line[0] 122 | time_end = line[1] 123 | frame_start = int(round(time_start / hopsize_t)) 124 | frame_end = int(round(time_end / hopsize_t)) 125 | 126 | # log_mel_reshape line 127 | mfccs_line = mfccs[:, frame_start: frame_end] 128 | return mfccs_line -------------------------------------------------------------------------------- /kaldi_alignment/srcPy/textgridParser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | 4 | import kaldi_alignment.srcPy.textgrid as tgp 5 | 6 | 7 | def textGrid2WordList(textgrid_file, whichTier = 'pinyin', utf16 = True): 8 | ''' 9 | parse textGrid into a python list of tokens 10 | @param whichTier : 'pinyin' default tier name 11 | ''' 12 | if not os.path.isfile(textgrid_file): raise Exception("file {} not found".format(textgrid_file)) 13 | beginTsAndWordList = [] 14 | 15 | if utf16: 16 | par_obj = tgp.TextGrid.loadUTF16(textgrid_file) #loading the object 17 | else: 18 | par_obj = tgp.TextGrid.load(textgrid_file) #loading the object 19 | 20 | tiers= tgp.TextGrid._find_tiers(par_obj) #finding existing tiers 21 | 22 | isTierFound = False 23 | for tier in tiers: 24 | tierName= tier.tier_name().replace('.','') 25 | #iterating over tiers and selecting the one specified 26 | if tierName == whichTier: 27 | isTierFound = True 28 | #this function parse the file nicely and return cool tuples 29 | tier_details = tier.make_simple_transcript() 30 | 31 | for line in tier_details: 32 | beginTsAndWordList.append([float(line[0]), float(line[1]), line[2]]) 33 | 34 | if not isTierFound: 35 | print('Missing tier {1} in file {0}' .format(textgrid_file, whichTier)) 36 | 37 | return beginTsAndWordList, isTierFound 38 | 39 | 40 | def line2WordList(line, entireWordList): 41 | ''' 42 | find the nested wordList of entireWordList by line tuple 43 | :param line: line tuple [startTime, endTime, string] 44 | :param entireWordList: entire word list 45 | :return: nested wordList 46 | ''' 47 | nestedWordList = [] 48 | vault = False 49 | for wordlist in entireWordList: 50 | # the ending of the line 51 | if wordlist[1] == line[1]: 52 | nestedWordList.append(wordlist) 53 | break 54 | # the beginning of the line 55 | if wordlist[0] == line[0]: 56 | vault = True 57 | if vault == True: 58 | nestedWordList.append(wordlist) 59 | 60 | return nestedWordList 61 | 62 | 63 | def wordListsParseByLines(entireLine, entireWordList): 64 | ''' 65 | find the wordList for each line, cut the word list according to line 66 | :param entireLine: entire lines in line tier 67 | :param entirewWordList: entire word lists in pinyin tier 68 | :return: 69 | nestedWordLists: [[line0, wordList0], [line1, wordList1], ...] 70 | numLines: sum of number of lines 71 | numWords: sum of number of words 72 | ''' 73 | nestedWordLists = [] 74 | numLines = 0 75 | numWords = 0 76 | 77 | for line in entireLine: 78 | # asciiLine=line[2].encode("ascii", "replace") 79 | asciiLine = line[2] 80 | if len(asciiLine.replace(" ", "")): # if line is not empty 81 | numLines += 1 82 | nestedWordList = [] 83 | wordList = line2WordList(line, entireWordList) 84 | for word in wordList: 85 | # asciiWord = word[2].encode("ascii", "replace") 86 | asciiWord = word[2] 87 | # if len(asciiWord.replace(" ","")): # if word is not empty 88 | numWords += 1 89 | nestedWordList.append(word) 90 | nestedWordLists.append([line,nestedWordList]) 91 | 92 | return nestedWordLists, numLines, numWords 93 | 94 | 95 | def syllableTextgridExtraction(textgrid_path, recording, tier0, tier1): 96 | 97 | ''' 98 | Extract syllable boundary and phoneme boundary from textgrid 99 | :param textgrid_path: 100 | :param recording: 101 | :param tier0: parent tier 102 | :param tier1: child tier which should be covered by parent tier 103 | :return: 104 | nestedPhonemeList, element[0] - syllable, element[1] - a list containing the phoneme of the syllable 105 | ''' 106 | 107 | print(textgrid_path, recording) 108 | textgrid_file = os.path.join(textgrid_path, recording+'.TextGrid') 109 | 110 | syllableList, _ = textGrid2WordList(textgrid_file, whichTier=tier0) 111 | phonemeList, _ = textGrid2WordList(textgrid_file, whichTier=tier1) 112 | 113 | # parse syllables of groundtruth 114 | nestedPhonemeLists, numSyllables, numPhonemes = wordListsParseByLines(syllableList, phonemeList) 115 | 116 | return nestedPhonemeLists, numSyllables, numPhonemes 117 | 118 | -------------------------------------------------------------------------------- /neural_net/utils/textgridParser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys, os 3 | 4 | currentPath = os.path.dirname(__file__) 5 | utilsPath = os.path.join(currentPath, 'utils') 6 | sys.path.append(utilsPath) 7 | 8 | import neural_net.utils.textgrid as tgp 9 | 10 | 11 | def textGrid2WordList(textgrid_file, whichTier = 'pinyin', utf16 = True): 12 | ''' 13 | parse textGrid into a python list of tokens 14 | @param whichTier : 'pinyin' default tier name 15 | ''' 16 | if not os.path.isfile(textgrid_file): raise Exception("file {} not found".format(textgrid_file)) 17 | beginTsAndWordList = [] 18 | 19 | if utf16: 20 | par_obj = tgp.TextGrid.loadUTF16(textgrid_file) #loading the object 21 | else: 22 | par_obj = tgp.TextGrid.load(textgrid_file) #loading the object 23 | 24 | tiers = tgp.TextGrid._find_tiers(par_obj) #finding existing tiers 25 | 26 | isTierFound = False 27 | for tier in tiers: 28 | tierName= tier.tier_name().replace('.', '') 29 | #iterating over tiers and selecting the one specified 30 | if tierName == whichTier: 31 | isTierFound = True 32 | #this function parse the file nicely and return cool tuples 33 | tier_details = tier.make_simple_transcript() 34 | 35 | for line in tier_details: 36 | beginTsAndWordList.append([float(line[0]), float(line[1]), line[2]]) 37 | 38 | if not isTierFound: 39 | print ('Missing tier {1} in file {0}' .format(textgrid_file, whichTier)) 40 | 41 | return beginTsAndWordList, isTierFound 42 | 43 | 44 | def line2WordList(line, entireWordList): 45 | ''' 46 | find the nested wordList of entireWordList by line tuple 47 | :param line: line tuple [startTime, endTime, string] 48 | :param entireWordList: entire word list 49 | :return: nested wordList 50 | ''' 51 | nestedWordList = [] 52 | vault = False 53 | for wordlist in entireWordList: 54 | # the ending of the line 55 | if wordlist[1] == line[1]: 56 | nestedWordList.append(wordlist) 57 | break 58 | # the beginning of the line 59 | if wordlist[0] == line[0]: 60 | vault = True 61 | if vault == True: 62 | nestedWordList.append(wordlist) 63 | 64 | return nestedWordList 65 | 66 | def wordListsParseByLines(entireLine, entireWordList): 67 | ''' 68 | find the wordList for each line, cut the word list according to line 69 | :param entireLine: entire lines in line tier 70 | :param entirewWordList: entire word lists in pinyin tier 71 | :return: 72 | nestedWordLists: [[line0, wordList0], [line1, wordList1], ...] 73 | numLines: sum of number of lines 74 | numWords: sum of number of words 75 | ''' 76 | nestedWordLists = [] 77 | numLines = 0 78 | numWords = 0 79 | 80 | for line in entireLine: 81 | # asciiLine=line[2].encode("ascii", "replace") 82 | if len(line[2].replace(" ", "")): # if line is not empty 83 | numLines += 1 84 | nestedWordList = [] 85 | wordList = line2WordList(line, entireWordList) 86 | for word in wordList: 87 | # asciiWord = word[2].encode("ascii", "replace") 88 | if len(word[2].replace(" ", "")): # if word is not empty 89 | numWords += 1 90 | nestedWordList.append(word) 91 | nestedWordLists.append([line,nestedWordList]) 92 | 93 | return nestedWordLists, numLines, numWords 94 | 95 | def syllableTextgridExtraction(textgrid_path, recording, tier0, tier1): 96 | 97 | ''' 98 | Extract syllable boundary and phoneme boundary from textgrid 99 | :param textgrid_path: 100 | :param recording: 101 | :param tier0: parent tier 102 | :param tier1: child tier which should be covered by parent tier 103 | :return: 104 | nestedPhonemeList, element[0] - syllable, element[1] - a list containing the phoneme of the syllable 105 | ''' 106 | 107 | textgrid_file = os.path.join(textgrid_path,recording+'.TextGrid') 108 | 109 | syllableList = textGrid2WordList(textgrid_file, whichTier=tier0) 110 | phonemeList = textGrid2WordList(textgrid_file, whichTier=tier1) 111 | 112 | # parse syllables of groundtruth 113 | nestedPhonemeLists, numSyllables, numPhonemes = wordListsParseByLines(syllableList, phonemeList) 114 | 115 | return nestedPhonemeLists, numSyllables, numPhonemes 116 | 117 | 118 | -------------------------------------------------------------------------------- /neural_net/keras_tcn/tcn/tcn.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras import optimizers 3 | from keras.layers import Conv1D, SpatialDropout1D 4 | from keras.layers import Activation, Lambda 5 | from keras.layers import Convolution1D, Dense 6 | from keras.models import Input, Model 7 | import keras.layers 8 | 9 | 10 | def channel_normalization(x): 11 | # Normalize by the highest activation 12 | max_values = K.max(K.abs(x), 2, keepdims=True) + 1e-5 13 | out = x / max_values 14 | return out 15 | 16 | 17 | def wave_net_activation(x): 18 | tanh_out = Activation('tanh')(x) 19 | sigm_out = Activation('sigmoid')(x) 20 | return keras.layers.multiply([tanh_out, sigm_out]) 21 | 22 | 23 | def residual_block(x, s, i, activation, nb_filters, kernel_size, dropout): 24 | original_x = x 25 | conv = Conv1D(filters=nb_filters, kernel_size=kernel_size, 26 | dilation_rate=2 ** i, padding='causal', 27 | name='dilated_conv_%d_tanh_s%d' % (2 ** i, s))(x) 28 | if activation == 'norm_relu': 29 | x = Activation('relu')(conv) 30 | x = Lambda(channel_normalization)(x) 31 | elif activation == 'wavenet': 32 | x = wave_net_activation(conv) 33 | else: 34 | x = Activation(activation)(conv) 35 | 36 | x = SpatialDropout1D(dropout)(x) 37 | 38 | # 1x1 conv. 39 | x = Convolution1D(nb_filters, 1, padding='same')(x) 40 | res_x = keras.layers.add([original_x, x]) 41 | return res_x, x 42 | 43 | 44 | def dilated_tcn(num_feat, num_classes, nb_filters, 45 | kernel_size, dilatations, nb_stacks, max_len, dropout, 46 | activation='wavenet', use_skip_connections=True, 47 | return_param_str=False, output_slice_index=None, 48 | regression=False): 49 | """ 50 | dilation_depth : number of layers per stack 51 | nb_stacks : number of stacks. 52 | """ 53 | input_layer = Input(name='input_layer', shape=(max_len, num_feat)) 54 | x = input_layer 55 | x = Convolution1D(nb_filters, kernel_size, padding='causal', name='initial_conv')(x) 56 | 57 | skip_connections = [] 58 | for s in range(nb_stacks): 59 | for i in dilatations: 60 | x, skip_out = residual_block(x, s, i, activation, nb_filters, kernel_size, dropout) 61 | skip_connections.append(skip_out) 62 | 63 | if use_skip_connections: 64 | x = keras.layers.add(skip_connections) 65 | x = Activation('relu')(x) 66 | 67 | if output_slice_index is not None: # can test with 0 or -1. 68 | if output_slice_index == 'last': 69 | output_slice_index = -1 70 | if output_slice_index == 'first': 71 | output_slice_index = 0 72 | x = Lambda(lambda tt: tt[:, output_slice_index, :])(x) 73 | 74 | print('x.shape=', x.shape) 75 | 76 | if not regression: 77 | if num_classes == 2: 78 | x = Dense(1)(x) 79 | x = Activation('sigmoid', name='output_sigmoid')(x) 80 | output_layer = x 81 | print(f'model.x = {input_layer.shape}') 82 | print(f'model.y = {output_layer.shape}') 83 | model = Model(input_layer, output_layer) 84 | adam = optimizers.Adam(lr=0.002, clipnorm=1.) 85 | model.compile(adam, loss='binary_crossentropy', metrics=['accuracy']) 86 | print('Adam with norm clipping.') 87 | elif num_classes > 2: 88 | # classification 89 | x = Dense(num_classes)(x) 90 | x = Activation('softmax', name='output_softmax')(x) 91 | output_layer = x 92 | print(f'model.x = {input_layer.shape}') 93 | print(f'model.y = {output_layer.shape}') 94 | model = Model(input_layer, output_layer) 95 | 96 | adam = optimizers.Adam(lr=0.002, clipnorm=1.) 97 | model.compile(adam, loss='sparse_categorical_crossentropy', metrics=['accuracy']) 98 | print('Adam with norm clipping.') 99 | else: 100 | raise ValueError 101 | else: 102 | # regression 103 | x = Dense(1)(x) 104 | x = Activation('linear', name='output_dense')(x) 105 | output_layer = x 106 | print(f'model.x = {input_layer.shape}') 107 | print(f'model.y = {output_layer.shape}') 108 | model = Model(input_layer, output_layer) 109 | adam = optimizers.Adam(lr=0.002, clipnorm=1.) 110 | model.compile(adam, loss='mean_squared_error') 111 | 112 | if return_param_str: 113 | param_str = 'D-TCN_C{}_B{}_L{}'.format(2, nb_stacks, dilatations) 114 | return model, param_str 115 | else: 116 | return model 117 | -------------------------------------------------------------------------------- /neural_net/data/mispronunciation_filelist_train.csv: -------------------------------------------------------------------------------- 1 | part1,,danAll,dafeh-Bi_yun_tian-Xi_xiang_ji01-qm,, 2 | part1,,danAll,danbz-Bei_jiu_chan-Chun_gui_men01-qm,, 3 | part1,,danAll,danbz-Kan_dai_wang-Ba_wang_bie_ji01-qm,, 4 | part1,,danAll,daspd-Hai_dao_bing-Gui_fei_zui_jiu02-qm,, 5 | part1,,danAll,daxp-Chun_qiu_ting-Suo_lin_nang01-qm,, 6 | part1,,danAll,daxp-Jiao_Zhang_sheng-Hong_niang01-qm,, 7 | part1,,danAll,daxp-Jiao_Zhang_sheng-Hong_niang04-qm,, 8 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai02-qm,, 9 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai04-qm,, 10 | part1,,danAll,daxp-Zhe_cai_shi-Suo_lin_nang01-qm,, 11 | part1,,laosheng,lseh-Tan_Yang_jia-Hong_yang_dong-qm,, 12 | part1,,laosheng,lseh-Wei_guo_jia-Hong_yang_dong02-qm,, 13 | part1,,laosheng,lseh-Wo_ben_shi-Qiong_lin_yan-qm,, 14 | part1,,laosheng,lseh-Yi_lun_ming-Wen_zhao_guan-qm,, 15 | part1,,laosheng,lseh-Zi_na_ri-Hong_yang_dong-qm,, 16 | part1,,laosheng,lsxp-Guo_liao_yi-Wen_zhao_guan02-qm,, 17 | part1,,laosheng,lsxp-Huai_nan_wang-Huai_he_ying02-qm,, 18 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian02-qm,, 19 | part1,,laosheng,lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm,, 20 | part1,,laosheng,lsxp-Quan_qian_sui-Gan_lu_si-qm,, 21 | part1,,laosheng,lsxp-Shi_ye_shuo-Ding_jun_shan-qm,, 22 | part1,,laosheng,lsxp-Wo_ben_shi-Kong_cheng_ji-qm,, 23 | part1,,laosheng,lsxp-Wo_zheng_zai-Kong_cheng_ji04-qm,, 24 | part1,,laosheng,lsxp-Xi_ri_you-Zhu_lian_zhai-qm,, 25 | part1,,danAll,daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon,, 26 | part1,,danAll,daspd-Du_shou_kong-Wang_jiang_ting-upf,, 27 | part1,,danAll,daspd-Hai_dao_bing-Gui_fei_zui_jiu01-lon,, 28 | part1,,danAll,daxp-Guan_Shi_yin-Tian_nv_san_hua-lon,, 29 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai01-upf,, 30 | part1,,laosheng,lseh-Wei_guo_jia-Hong_yang_dong01-lon,, 31 | part1,,laosheng,lsxp-Huai_nan_wang-Huai_he_ying01-lon,, 32 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian01-1-upf,, 33 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian01-2-upf,, 34 | part1,,laosheng,lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf,, 35 | part2,,20170327LiaoJiaNi,lsxp-Yi_ma_li-Wu_jia_po-nacta,, 36 | part2,,20170327LiaoJiaNi,lseh-Niang_zi_bu-Sou_gu_jiu-nacta,, 37 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,teacher,, 38 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,teacher,, 39 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,teacher,, 40 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,teacher,, 41 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,teacher,, 42 | part3,20171214SongRuoXuan,daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo,teacher,, 43 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,teacher,, 44 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,teacher,, 45 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,teacher,, 46 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,teacher,, 47 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,teacher,, 48 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,teacher,, 49 | part3,20171217TianHao,lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou,teacher,, 50 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student01,, 51 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student02_first_half,, 52 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student02,, 53 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student03,, 54 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student04,, 55 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student05,, 56 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student06,, 57 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student01,, 58 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student02,, 59 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student03,, 60 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student04,, 61 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student05,, 62 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student06,, 63 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_01,, 64 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_02,, 65 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_03,, 66 | part3,20171214SongRuoXuan,daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo,student_01,, 67 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_01,, 68 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_03,, 69 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,student_01,, 70 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,student_02,, 71 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,student_01,, 72 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,student_02,, 73 | part3,20171217TianHao,lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou,student_01_sizhu,, -------------------------------------------------------------------------------- /kaldi_alignment/srcPy/mispronunciation_filelist_train.csv: -------------------------------------------------------------------------------- 1 | part1,,danAll,dafeh-Bi_yun_tian-Xi_xiang_ji01-qm,, 2 | part1,,danAll,danbz-Bei_jiu_chan-Chun_gui_men01-qm,, 3 | part1,,danAll,danbz-Kan_dai_wang-Ba_wang_bie_ji01-qm,, 4 | part1,,danAll,daspd-Hai_dao_bing-Gui_fei_zui_jiu02-qm,, 5 | part1,,danAll,daxp-Chun_qiu_ting-Suo_lin_nang01-qm,, 6 | part1,,danAll,daxp-Jiao_Zhang_sheng-Hong_niang01-qm,, 7 | part1,,danAll,daxp-Jiao_Zhang_sheng-Hong_niang04-qm,, 8 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai02-qm,, 9 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai04-qm,, 10 | part1,,danAll,daxp-Zhe_cai_shi-Suo_lin_nang01-qm,, 11 | part1,,laosheng,lseh-Tan_Yang_jia-Hong_yang_dong-qm,, 12 | part1,,laosheng,lseh-Wei_guo_jia-Hong_yang_dong02-qm,, 13 | part1,,laosheng,lseh-Wo_ben_shi-Qiong_lin_yan-qm,, 14 | part1,,laosheng,lseh-Yi_lun_ming-Wen_zhao_guan-qm,, 15 | part1,,laosheng,lseh-Zi_na_ri-Hong_yang_dong-qm,, 16 | part1,,laosheng,lsxp-Guo_liao_yi-Wen_zhao_guan02-qm,, 17 | part1,,laosheng,lsxp-Huai_nan_wang-Huai_he_ying02-qm,, 18 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian02-qm,, 19 | part1,,laosheng,lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm,, 20 | part1,,laosheng,lsxp-Quan_qian_sui-Gan_lu_si-qm,, 21 | part1,,laosheng,lsxp-Shi_ye_shuo-Ding_jun_shan-qm,, 22 | part1,,laosheng,lsxp-Wo_ben_shi-Kong_cheng_ji-qm,, 23 | part1,,laosheng,lsxp-Wo_zheng_zai-Kong_cheng_ji04-qm,, 24 | part1,,laosheng,lsxp-Xi_ri_you-Zhu_lian_zhai-qm,, 25 | part1,,danAll,daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon,, 26 | part1,,danAll,daspd-Du_shou_kong-Wang_jiang_ting-upf,, 27 | part1,,danAll,daspd-Hai_dao_bing-Gui_fei_zui_jiu01-lon,, 28 | part1,,danAll,daxp-Guan_Shi_yin-Tian_nv_san_hua-lon,, 29 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai01-upf,, 30 | part1,,laosheng,lseh-Wei_guo_jia-Hong_yang_dong01-lon,, 31 | part1,,laosheng,lsxp-Huai_nan_wang-Huai_he_ying01-lon,, 32 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian01-1-upf,, 33 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian01-2-upf,, 34 | part1,,laosheng,lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf,, 35 | part2,,20170327LiaoJiaNi,lsxp-Yi_ma_li-Wu_jia_po-nacta,, 36 | part2,,20170327LiaoJiaNi,lseh-Niang_zi_bu-Sou_gu_jiu-nacta,, 37 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,teacher,, 38 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,teacher,, 39 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,teacher,, 40 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,teacher,, 41 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,teacher,, 42 | part3,20171214SongRuoXuan,daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo,teacher,, 43 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,teacher,, 44 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,teacher,, 45 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,teacher,, 46 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,teacher,, 47 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,teacher,, 48 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,teacher,, 49 | part3,20171217TianHao,lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou,teacher,, 50 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student01,, 51 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student02_first_half,, 52 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student02,, 53 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student03,, 54 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student04,, 55 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student05,, 56 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student06,, 57 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student01,, 58 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student02,, 59 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student03,, 60 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student04,, 61 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student05,, 62 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student06,, 63 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_01,, 64 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_02,, 65 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_03,, 66 | part3,20171214SongRuoXuan,daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo,student_01,, 67 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_01,, 68 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_03,, 69 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,student_01,, 70 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,student_02,, 71 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,student_01,, 72 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,student_02,, 73 | part3,20171217TianHao,lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou,student_01_sizhu,, -------------------------------------------------------------------------------- /neural_net/training_scripts/attention.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from keras import backend as K, initializers, regularizers, constraints 3 | from keras.engine.topology import Layer 4 | 5 | 6 | def dot_product(x, kernel): 7 | """ 8 | Wrapper for dot product operation, in order to be compatible with both 9 | Theano and Tensorflow 10 | Args: 11 | x (): input 12 | kernel (): weights 13 | Returns: 14 | """ 15 | if K.backend() == 'tensorflow': 16 | # todo: check that this is correct 17 | kernel = K.expand_dims(kernel) 18 | return K.squeeze(K.dot(x, kernel), axis=-1) 19 | else: 20 | return K.dot(x, kernel) 21 | 22 | 23 | class Attention(Layer): 24 | def __init__(self, 25 | W_regularizer=None, b_regularizer=None, 26 | W_constraint=None, b_constraint=None, 27 | bias=True, 28 | return_attention=False, 29 | **kwargs): 30 | """ 31 | Keras Layer that implements an Attention mechanism for temporal data. 32 | Supports Masking. 33 | Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756] 34 | # Input shape 35 | 3D tensor with shape: `(samples, steps, features)`. 36 | # Output shape 37 | 2D tensor with shape: `(samples, features)`. 38 | :param kwargs: 39 | Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True. 40 | The dimensions are inferred based on the output shape of the RNN. 41 | Note: The layer has been tested with Keras 1.x 42 | Example: 43 | # 1 44 | model.add(LSTM(64, return_sequences=True)) 45 | model.add(Attention()) 46 | # next add a Dense layer (for classification/regression) or whatever... 47 | # 2 - Get the attention scores 48 | hidden = LSTM(64, return_sequences=True)(words) 49 | sentence, word_scores = Attention(return_attention=True)(hidden) 50 | """ 51 | self.supports_masking = True 52 | self.return_attention = return_attention 53 | self.init = initializers.get('glorot_uniform') 54 | 55 | self.W_regularizer = regularizers.get(W_regularizer) 56 | self.b_regularizer = regularizers.get(b_regularizer) 57 | 58 | self.W_constraint = constraints.get(W_constraint) 59 | self.b_constraint = constraints.get(b_constraint) 60 | 61 | self.bias = bias 62 | super(Attention, self).__init__(**kwargs) 63 | 64 | def build(self, input_shape): 65 | assert len(input_shape) == 3 66 | 67 | self.W = self.add_weight((input_shape[-1],), 68 | initializer=self.init, 69 | name='{}_W'.format(self.name), 70 | regularizer=self.W_regularizer, 71 | constraint=self.W_constraint) 72 | if self.bias: 73 | self.b = self.add_weight((1,), 74 | initializer='zero', 75 | name='{}_b'.format(self.name), 76 | regularizer=self.b_regularizer, 77 | constraint=self.b_constraint) 78 | else: 79 | self.b = None 80 | 81 | self.built = True 82 | 83 | def compute_mask(self, input, input_mask=None): 84 | # do not pass the mask to the next layers 85 | return None 86 | 87 | def call(self, x, mask=None): 88 | eij = dot_product(x, self.W) # (samples, steps) 89 | 90 | if self.bias: 91 | eij += self.b 92 | 93 | eij = K.tanh(eij) 94 | 95 | a = K.exp(eij) 96 | 97 | # apply mask after the exp. will be re-normalized next 98 | if mask is not None: 99 | # Cast the mask to floatX to avoid float64 upcasting in theano 100 | a *= K.cast(mask, K.floatx()) 101 | 102 | # in some cases especially in the early stages of training the sum may be almost zero 103 | # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. 104 | # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) 105 | a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) 106 | 107 | a_expand = K.expand_dims(a) 108 | 109 | # element wise 110 | weighted_input = x * a_expand 111 | 112 | result = K.sum(weighted_input, axis=1) 113 | 114 | if self.return_attention: 115 | return [result, a] 116 | return result 117 | 118 | def compute_output_shape(self, input_shape): 119 | if self.return_attention: 120 | return [(input_shape[0], input_shape[-1]), 121 | (input_shape[0], input_shape[1])] 122 | else: 123 | return input_shape[0], input_shape[-1] -------------------------------------------------------------------------------- /neural_net/training_scripts/hpc_code/train_run_jianzi_tcn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pickle 4 | import numpy as np 5 | from sklearn.model_selection import StratifiedKFold 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | 9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 10 | 11 | from neural_net.training_scripts.models_TCN import train_TCN_batch 12 | from neural_net.training_scripts.models_RNN import eval_RNN_model 13 | from neural_net.combine_feature_label import combine_feature_label 14 | from neural_net.file_path import * 15 | 16 | 17 | if __name__ == '__main__': 18 | 19 | cv_prod = "cv" 20 | batch_size = 1 21 | input_shape = (batch_size, None, 80) 22 | patience = 15 23 | attention = False 24 | dropout = 0.05 25 | epoch = 500 26 | 27 | path_model = '/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/model/' 28 | 29 | with open(dict_jianzi_positive, "rb") as f: 30 | feature_jianzi_pos = pickle.load(f) 31 | 32 | with open(dict_jianzi_negative, "rb") as f: 33 | feature_jianzi_neg = pickle.load(f) 34 | 35 | X_jianzi, y_jianzi = combine_feature_label(dict_positive=feature_jianzi_pos, 36 | dict_negative=feature_jianzi_neg) 37 | 38 | if cv_prod == "cv": 39 | list_loss = [] 40 | list_acc = [] 41 | skf = StratifiedKFold(n_splits=5) 42 | for ii, (train_index, val_index) in enumerate(skf.split(X_jianzi, y_jianzi)): 43 | 44 | model_name = 'jianzi_model_tcn_1_stack_3_{}'.format(dropout) 45 | file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5') 46 | file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv') 47 | 48 | print("TRAIN:", train_index, "TEST:", val_index) 49 | 50 | X_train, X_test = [X_jianzi[ii] for ii in train_index], [X_jianzi[ii] for ii in val_index] 51 | y_train, y_test = y_jianzi[train_index], y_jianzi[val_index] 52 | 53 | X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.1) 54 | 55 | # standarization 56 | scaler = StandardScaler() 57 | X_train_conc = np.concatenate(X_train) 58 | scaler.fit(X_train_conc) 59 | 60 | model = train_TCN_batch(list_feature_fold_train=X_train, 61 | labels_fold_train=y_train, 62 | list_feature_fold_val=X_val, 63 | labels_fold_val=y_val, 64 | batch_size=batch_size, 65 | input_shape=input_shape, 66 | file_path_model=file_path_model, 67 | filename_log=file_path_log, 68 | epoch=epoch, 69 | patience=patience, 70 | scaler=scaler, 71 | dropout=dropout, 72 | summ=True, 73 | verbose=2) 74 | 75 | loss_test = eval_RNN_model(list_feature_test=X_test, 76 | labels_test=y_test, 77 | file_path_model=file_path_model, 78 | attention=attention, 79 | scaler=scaler) 80 | 81 | list_loss.append(loss_test) 82 | 83 | with open(os.path.join(path_model, 'log', 'jianzi_esults_tcn_1_stack_3_{}.txt'.format(dropout)), 'w') as f: 84 | f.write("loss {}".format(np.mean(list_loss))) 85 | 86 | elif cv_prod == "prod": 87 | X_train, X_val, y_train, y_val = train_test_split(X_jianzi, y_jianzi, stratify=y_jianzi, test_size=0.1) 88 | 89 | model_name = 'jianzi_model_prod_tcn_{}'.format(dropout) 90 | file_path_model = os.path.join(path_model, model_name + '.h5') 91 | file_path_log = os.path.join(path_model, 'log', model_name + '.csv') 92 | 93 | # standarization 94 | scaler = StandardScaler() 95 | X_train_conc = np.concatenate(X_train) 96 | scaler.fit(X_train_conc) 97 | 98 | train_TCN_batch(list_feature_fold_train=X_train, 99 | labels_fold_train=y_train, 100 | list_feature_fold_val=X_val, 101 | labels_fold_val=y_val, 102 | batch_size=batch_size, 103 | input_shape=input_shape, 104 | file_path_model=file_path_model, 105 | filename_log=file_path_log, 106 | epoch=epoch, 107 | patience=patience, 108 | scaler=scaler, 109 | dropout=dropout, 110 | summ=True, 111 | verbose=2) 112 | else: 113 | raise ValueError("{} is not a valid option.".format(cv_prod)) 114 | -------------------------------------------------------------------------------- /neural_net/training_scripts/hpc_code/train_run_special_tcn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pickle 4 | import numpy as np 5 | from sklearn.model_selection import StratifiedKFold 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | 9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 10 | 11 | from neural_net.training_scripts.models_TCN import train_TCN_batch 12 | from neural_net.training_scripts.models_RNN import eval_RNN_model 13 | from neural_net.combine_feature_label import combine_feature_label 14 | from neural_net.file_path import * 15 | 16 | 17 | if __name__ == '__main__': 18 | 19 | cv_prod = "cv" 20 | batch_size = 1 21 | input_shape = (batch_size, None, 80) 22 | patience = 15 23 | attention = False 24 | dropout = 0.05 25 | epoch = 500 26 | 27 | path_model = '/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/model/' 28 | 29 | with open(dict_special_positive, "rb") as f: 30 | feature_special_pos = pickle.load(f) 31 | 32 | with open(dict_special_negative, "rb") as f: 33 | feature_special_neg = pickle.load(f) 34 | 35 | X_special, y_special = combine_feature_label(dict_positive=feature_special_pos, 36 | dict_negative=feature_special_neg) 37 | 38 | if cv_prod == "cv": 39 | list_loss = [] 40 | list_acc = [] 41 | skf = StratifiedKFold(n_splits=5) 42 | for ii, (train_index, val_index) in enumerate(skf.split(X_special, y_special)): 43 | 44 | model_name = 'special_model_tcn_1_stack_3_{}'.format(dropout) 45 | file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5') 46 | file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv') 47 | 48 | print("TRAIN:", train_index, "TEST:", val_index) 49 | 50 | X_train, X_test = [X_special[ii] for ii in train_index], [X_special[ii] for ii in val_index] 51 | y_train, y_test = y_special[train_index], y_special[val_index] 52 | 53 | X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.1) 54 | 55 | # standarization 56 | scaler = StandardScaler() 57 | X_train_conc = np.concatenate(X_train) 58 | scaler.fit(X_train_conc) 59 | 60 | model = train_TCN_batch(list_feature_fold_train=X_train, 61 | labels_fold_train=y_train, 62 | list_feature_fold_val=X_val, 63 | labels_fold_val=y_val, 64 | batch_size=batch_size, 65 | input_shape=input_shape, 66 | file_path_model=file_path_model, 67 | filename_log=file_path_log, 68 | epoch=epoch, 69 | patience=patience, 70 | scaler=scaler, 71 | dropout=dropout, 72 | summ=True, 73 | verbose=2) 74 | 75 | loss_test = eval_RNN_model(list_feature_test=X_test, 76 | labels_test=y_test, 77 | file_path_model=file_path_model, 78 | attention=attention, 79 | scaler=scaler) 80 | 81 | list_loss.append(loss_test) 82 | 83 | with open(os.path.join(path_model, 'log', 'special_results_tcn_1_stack_3_{}.txt'.format(dropout)), 'w') as f: 84 | f.write("loss {}".format(np.mean(list_loss))) 85 | 86 | elif cv_prod == "prod": 87 | X_train, X_val, y_train, y_val = train_test_split(X_special, y_special, stratify=y_special, test_size=0.1) 88 | 89 | model_name = 'special_model_prod_tcn_{}'.format(dropout) 90 | file_path_model = os.path.join(path_model, model_name + '.h5') 91 | file_path_log = os.path.join(path_model, 'log', model_name + '.csv') 92 | 93 | # standarization 94 | scaler = StandardScaler() 95 | X_train_conc = np.concatenate(X_train) 96 | scaler.fit(X_train_conc) 97 | 98 | train_TCN_batch(list_feature_fold_train=X_train, 99 | labels_fold_train=y_train, 100 | list_feature_fold_val=X_val, 101 | labels_fold_val=y_val, 102 | batch_size=batch_size, 103 | input_shape=input_shape, 104 | file_path_model=file_path_model, 105 | filename_log=file_path_log, 106 | epoch=epoch, 107 | patience=patience, 108 | scaler=scaler, 109 | dropout=dropout, 110 | summ=True, 111 | verbose=2) 112 | else: 113 | raise ValueError("{} is not a valid option.".format(cv_prod)) 114 | -------------------------------------------------------------------------------- /neural_net/viterbiDecodingPhonemeSeg.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import norm 3 | from neural_net.parameters import hopsize_t 4 | cimport cython 5 | 6 | value_eps = np.finfo(float).eps 7 | 8 | def FdurationProba2( syllable_duration, param_s ): 9 | 10 | M1 = syllable_duration/hopsize_t 11 | 12 | # % delta 13 | if param_s['delta_mode'] == 'constant': 14 | delta = param_s['delta'] 15 | elif param_s['delta_mode'] == 'proportion': 16 | delta = syllable_duration * param_s['delta'] 17 | else: 18 | raise('Error: delta_default should be either constant or proportion.') 19 | S1 = delta/hopsize_t 20 | 21 | # % duration max is three times of standard deviation 22 | duration_max = syllable_duration + 3.0*delta 23 | 24 | tmin = 0 25 | 26 | tmax = int(duration_max/hopsize_t) 27 | 28 | # Ps = pdf('Normal',(tmin : tmax), M1, S1) 29 | x = range(tmin, tmax) 30 | Ps = norm.pdf(x, M1, S1) 31 | return Ps, tmin, tmax 32 | 33 | @cython.cdivision(True) 34 | @cython.boundscheck(False) 35 | @cython.wraparound(False) 36 | def viterbiSegmental2(P, sd, param_s): 37 | """ 38 | :param P: NxT emission probability state sequence (P(j,t) = emission probability of symbol j at time t) 39 | :param sd: 1xT score duration array 40 | :param param_s: 41 | :return: 42 | """ 43 | 44 | # preventsingularities 45 | P[P == 0] = value_eps 46 | 47 | i_bound = np.where(P > value_eps)[0] 48 | N = len(i_bound) 49 | T = len(sd) 50 | 51 | # log - likelihood 52 | delta = np.zeros((N, T), dtype=np.double) 53 | psi = np.zeros((N, T), dtype=np.double) 54 | logP = np.log(P, dtype=np.double) 55 | 56 | # duration probability 57 | Ps, _, _ = FdurationProba2(sd[0], param_s) 58 | Ps[Ps == 0] = value_eps 59 | C = len(Ps) 60 | logPs = np.log(Ps, dtype=np.double) 61 | 62 | cdef double [:, ::1] cdelta = delta 63 | cdef double [:, ::1] cpsi = psi 64 | cdef double [::1] clogP = logP 65 | cdef double [::1] clogPs = logPs 66 | cdef int [::1] ci_bound = np.array(i_bound, dtype=np.intc) 67 | # % % % % % % % % % % % % % % % % % % 68 | # % Initialisation % 69 | # % % % % % % % % % % % % % % % % % % 70 | 71 | # % not a possible transition from > 0 time to 1 72 | cdelta[0, 0] = -np.inf 73 | cpsi[:,0] = 0 74 | for jj in range(1,N): 75 | d = ci_bound[jj] - ci_bound[0] 76 | # print(jj, i_bound[jj], d, C) 77 | if d >= C: 78 | cdelta[jj, 0] = -np.inf 79 | else: 80 | cdelta[jj, 0] = clogPs[d] + clogP[i_bound[jj]] 81 | 82 | clogPs = None 83 | 84 | # % % % % % % % % % % % % % % % % % % 85 | # % Recursion % 86 | # % % % % % % % % % % % % % % % % % % 87 | delta_current = np.zeros((N,), dtype=np.double) 88 | cdef double [::1] cdelta_current = delta_current 89 | 90 | for t in range(1,T - 1): 91 | # print(t) 92 | # % duration probability 93 | Ps, _, _ = FdurationProba2(sd[t], param_s) 94 | Ps[Ps == 0] = value_eps 95 | C = len(Ps) 96 | logPs = np.log(Ps, dtype=np.double) 97 | 98 | for jj in range(N): 99 | for ii in range(N): 100 | # print(i_bound, jj, ii) 101 | d = ci_bound[jj] - ci_bound[ii] 102 | # print(d, C) 103 | if d >= C or d <= 0: 104 | cdelta_current[ii] = -np.inf 105 | else: 106 | cdelta_current[ii] = cdelta[ii, t - 1] + logPs[d] 107 | 108 | I_delta = np.argmax(cdelta_current) 109 | M_delta = cdelta_current[I_delta] 110 | cdelta[jj, t] = M_delta + clogP[i_bound[jj]] # add emission because it's a constance 111 | cpsi[jj, t] = I_delta 112 | 113 | # % duration probability 114 | Ps, tmin, tmax = FdurationProba2(sd[T-1], param_s) 115 | Ps[Ps == 0] = value_eps 116 | C = len(Ps) 117 | logPs = np.log(Ps, dtype=np.double) 118 | clogPs = logPs 119 | # delta_current = np.zeros((N,)) 120 | 121 | for ii in range(N): 122 | d = ci_bound[N-1] - ci_bound[ii] 123 | if d >= C or d <= 0: 124 | cdelta_current[ii] = -np.inf 125 | else: 126 | cdelta_current[ii] = cdelta[ii, T-2] + clogPs[d] 127 | 128 | I_delta = np.argmax(cdelta_current) 129 | M_delta = cdelta_current[I_delta] # the posterior proba 130 | cdelta[N-1, T-1] = M_delta + clogP[i_bound[N-1]] 131 | cpsi[N-1, T-1] = I_delta 132 | 133 | # % % % % % % % % % % % % % % % % % % 134 | # % Backtrack % 135 | # % % % % % % % % % % % % % % % % % % 136 | i_best_sequence = np.zeros((T+1,),dtype=int) 137 | # print(i_best_sequence) 138 | i_best_sequence[T] = N-1 139 | for t in range(T)[::-1]: 140 | # print(t+1, i_best_sequence[t+1]) 141 | i_best_sequence[t] = int(cpsi[int(i_best_sequence[t + 1]), t]) 142 | # print(i_best_sequence) 143 | i_boundary = [i_bound[ii] for ii in i_best_sequence] 144 | 145 | cdelta = None 146 | cdelta_current = None 147 | clogPs = None 148 | clogP = None 149 | cpsi = None 150 | ci_bound = None 151 | 152 | return i_boundary 153 | -------------------------------------------------------------------------------- /neural_net/training_scripts/hpc_code/train_run_jianzi.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pickle 4 | import numpy as np 5 | from sklearn.model_selection import StratifiedKFold 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | 9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 10 | 11 | from neural_net.training_scripts.models_RNN import train_RNN_batch 12 | from neural_net.training_scripts.models_RNN import eval_RNN_model 13 | from neural_net.combine_feature_label import combine_feature_label 14 | from neural_net.file_path import * 15 | 16 | 17 | if __name__ == '__main__': 18 | 19 | cv_prod = "prod" 20 | batch_size = 1 21 | input_shape = (batch_size, None, 80) 22 | patience = 15 23 | attention = "feedforward" 24 | conv = True 25 | dropout = 0.5 26 | epoch = 500 27 | 28 | path_model = '/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/model/' 29 | 30 | with open(dict_jianzi_positive, "rb") as f: 31 | feature_jianzi_pos = pickle.load(f) 32 | 33 | with open(dict_jianzi_negative, "rb") as f: 34 | feature_jianzi_neg = pickle.load(f) 35 | 36 | X_jianzi, y_jianzi = combine_feature_label(dict_positive=feature_jianzi_pos, 37 | dict_negative=feature_jianzi_neg) 38 | 39 | if cv_prod == "cv": 40 | list_loss = [] 41 | list_acc = [] 42 | skf = StratifiedKFold(n_splits=5) 43 | for ii, (train_index, val_index) in enumerate(skf.split(X_jianzi, y_jianzi)): 44 | 45 | model_name = 'jianzi_model_{}_{}_{}'.format(attention, conv, dropout) 46 | file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5') 47 | file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv') 48 | 49 | print("TRAIN:", train_index, "TEST:", val_index) 50 | 51 | X_train, X_test = [X_jianzi[ii] for ii in train_index], [X_jianzi[ii] for ii in val_index] 52 | y_train, y_test = y_jianzi[train_index], y_jianzi[val_index] 53 | 54 | X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.1) 55 | 56 | # standarization 57 | scaler = StandardScaler() 58 | X_train_conc = np.concatenate(X_train) 59 | scaler.fit(X_train_conc) 60 | 61 | model = train_RNN_batch(list_feature_fold_train=X_train, 62 | labels_fold_train=y_train, 63 | list_feature_fold_val=X_val, 64 | labels_fold_val=y_val, 65 | batch_size=batch_size, 66 | input_shape=input_shape, 67 | output_shape=1, 68 | file_path_model=file_path_model, 69 | filename_log=file_path_log, 70 | epoch=epoch, 71 | patience=patience, 72 | scaler=scaler, 73 | attention=attention, 74 | conv=conv, 75 | dropout=dropout, 76 | summ=True, 77 | verbose=2) 78 | 79 | loss_test = eval_RNN_model(list_feature_test=X_test, 80 | labels_test=y_test, 81 | file_path_model=file_path_model, 82 | attention=attention, 83 | scaler=scaler) 84 | 85 | list_loss.append(loss_test) 86 | 87 | with open(os.path.join(path_model, 'log', 'jianzi_esults_{}_{}_{}.txt'.format(attention, conv, dropout)), 'w') as f: 88 | f.write("attention {} conv {} dropout {} loss {}".format(attention, conv, dropout, np.mean(list_loss))) 89 | 90 | elif cv_prod == "prod": 91 | X_train, X_val, y_train, y_val = train_test_split(X_jianzi, y_jianzi, stratify=y_jianzi, test_size=0.1) 92 | 93 | model_name = 'jianzi_model_prod_{}_{}_{}'.format(attention, conv, dropout) 94 | file_path_model = os.path.join(path_model, model_name + '.h5') 95 | file_path_log = os.path.join(path_model, 'log', model_name + '.csv') 96 | 97 | # standarization 98 | scaler = StandardScaler() 99 | X_train_conc = np.concatenate(X_train) 100 | scaler.fit(X_train_conc) 101 | 102 | train_RNN_batch(list_feature_fold_train=X_train, 103 | labels_fold_train=y_train, 104 | list_feature_fold_val=X_val, 105 | labels_fold_val=y_val, 106 | batch_size=batch_size, 107 | input_shape=input_shape, 108 | output_shape=1, 109 | file_path_model=file_path_model, 110 | filename_log=file_path_log, 111 | epoch=epoch, 112 | patience=patience, 113 | scaler=scaler, 114 | attention=attention, 115 | conv=conv, 116 | dropout=dropout, 117 | summ=True, 118 | verbose=2) 119 | else: 120 | raise ValueError("{} is not a valid option.".format(cv_prod)) 121 | -------------------------------------------------------------------------------- /neural_net/training_scripts/hpc_code/train_run_special.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pickle 4 | import numpy as np 5 | from sklearn.model_selection import StratifiedKFold 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | 9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 10 | 11 | from neural_net.training_scripts.models_RNN import train_RNN_batch 12 | from neural_net.training_scripts.models_RNN import eval_RNN_model 13 | from neural_net.combine_feature_label import combine_feature_label 14 | from neural_net.file_path import * 15 | 16 | 17 | if __name__ == '__main__': 18 | 19 | cv_prod = "cv" 20 | batch_size = 1 21 | input_shape = (batch_size, None, 80) 22 | patience = 15 23 | attention = "selfatt" 24 | conv = True 25 | dropout = 0.5 26 | epoch = 500 27 | 28 | path_model = '/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/model/' 29 | 30 | with open(dict_special_positive, "rb") as f: 31 | feature_special_pos = pickle.load(f) 32 | 33 | with open(dict_special_negative, "rb") as f: 34 | feature_special_neg = pickle.load(f) 35 | 36 | X_special, y_special = combine_feature_label(dict_positive=feature_special_pos, 37 | dict_negative=feature_special_neg) 38 | 39 | if cv_prod == "cv": 40 | list_loss = [] 41 | list_acc = [] 42 | skf = StratifiedKFold(n_splits=5) 43 | for ii, (train_index, val_index) in enumerate(skf.split(X_special, y_special)): 44 | 45 | model_name = 'special_model_{}_{}_{}'.format(attention, conv, dropout) 46 | file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5') 47 | file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv') 48 | 49 | print("TRAIN:", train_index, "TEST:", val_index) 50 | 51 | X_train, X_test = [X_special[ii] for ii in train_index], [X_special[ii] for ii in val_index] 52 | y_train, y_test = y_special[train_index], y_special[val_index] 53 | 54 | X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.1) 55 | 56 | # standarization 57 | scaler = StandardScaler() 58 | X_train_conc = np.concatenate(X_train) 59 | scaler.fit(X_train_conc) 60 | 61 | model = train_RNN_batch(list_feature_fold_train=X_train, 62 | labels_fold_train=y_train, 63 | list_feature_fold_val=X_val, 64 | labels_fold_val=y_val, 65 | batch_size=batch_size, 66 | input_shape=input_shape, 67 | output_shape=1, 68 | file_path_model=file_path_model, 69 | filename_log=file_path_log, 70 | epoch=epoch, 71 | patience=patience, 72 | scaler=scaler, 73 | attention=attention, 74 | conv=conv, 75 | dropout=dropout, 76 | summ=True, 77 | verbose=2) 78 | 79 | loss_test = eval_RNN_model(list_feature_test=X_test, 80 | labels_test=y_test, 81 | file_path_model=file_path_model, 82 | attention=attention, 83 | scaler=scaler) 84 | 85 | list_loss.append(loss_test) 86 | 87 | with open(os.path.join(path_model, 'log', 'special_results_{}_{}_{}.txt'.format(attention, conv, dropout)), 'w') as f: 88 | f.write("attention {} conv {} dropout {} loss {}".format(attention, conv, dropout, np.mean(list_loss))) 89 | 90 | elif cv_prod == "prod": 91 | X_train, X_val, y_train, y_val = train_test_split(X_special, y_special, stratify=y_special, test_size=0.1) 92 | 93 | model_name = 'special_model_prod_{}_{}_{}'.format(attention, conv, dropout) 94 | file_path_model = os.path.join(path_model, model_name + '.h5') 95 | file_path_log = os.path.join(path_model, 'log', model_name + '.csv') 96 | 97 | # standarization 98 | scaler = StandardScaler() 99 | X_train_conc = np.concatenate(X_train) 100 | scaler.fit(X_train_conc) 101 | 102 | train_RNN_batch(list_feature_fold_train=X_train, 103 | labels_fold_train=y_train, 104 | list_feature_fold_val=X_val, 105 | labels_fold_val=y_val, 106 | batch_size=batch_size, 107 | input_shape=input_shape, 108 | output_shape=1, 109 | file_path_model=file_path_model, 110 | filename_log=file_path_log, 111 | epoch=epoch, 112 | patience=patience, 113 | scaler=scaler, 114 | attention=attention, 115 | conv=conv, 116 | dropout=dropout, 117 | summ=True, 118 | verbose=2) 119 | else: 120 | raise ValueError("{} is not a valid option.".format(cv_prod)) 121 | -------------------------------------------------------------------------------- /neural_net/training_sample_collection_syllable.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import json 4 | from neural_net.parameters import * 5 | from neural_net.file_path import * 6 | from neural_net.utils.audio_preprocessing import get_log_mel_madmom 7 | from neural_net.file_path import parse_recordings 8 | from neural_net.utils.textgrid_preprocessing import parse_syllable_line_list 9 | 10 | 11 | def dump_feature_syllable(recordings, list_normal_special, list_normal_jianzi): 12 | 13 | # feature dictionary 14 | dic_syllable_special = {} 15 | dic_syllable_jianzi = {} 16 | dic_syllable_special_normal = {} 17 | dic_syllable_jianzi_normal = {} 18 | 19 | for rec in recordings: 20 | data_path, sub_folder, textgrid_folder, \ 21 | wav_folder, filename, line_tier, longsyllable_tier, syllable_tier, \ 22 | phoneme_tier, special_tier, special_class_tier, roletype = parse_recordings(rec) 23 | 24 | wav_filename = os.path.join(path_root, data_path, wav_folder, sub_folder, filename + ".wav") 25 | textgrid_filename = os.path.join(path_root, data_path, textgrid_folder, sub_folder, filename + ".textgrid") 26 | 27 | print("Parse textgrid file {}".format(textgrid_filename)) 28 | 29 | nested_syllable_list, is_file_exist, is_syllable_found = \ 30 | parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename, 31 | parent_tier=longsyllable_tier, 32 | child_tier=syllable_tier) 33 | 34 | nested_special_list, is_file_exist, is_special_found = \ 35 | parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename, 36 | parent_tier=longsyllable_tier, 37 | child_tier=special_tier) 38 | 39 | nested_specialClass_list, is_file_exist, is_specialClass_found = \ 40 | parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename, 41 | parent_tier=longsyllable_tier, 42 | child_tier=special_class_tier) 43 | 44 | log_mel = get_log_mel_madmom(audio_fn=wav_filename, 45 | fs=fs, 46 | hopsize_t=hopsize_t, 47 | channel=1, 48 | context=False) 49 | 50 | for ii_line in range(len(nested_special_list)): 51 | line_special_list = nested_special_list[ii_line] 52 | if line_special_list[0][2] != "1": 53 | line_syllable_list = nested_syllable_list[ii_line] 54 | line_specialClass_list = nested_specialClass_list[ii_line] 55 | 56 | for ii_syl in range(len(line_specialClass_list[1])): 57 | special_class = line_specialClass_list[1][ii_syl][2] 58 | try: 59 | syllable = line_syllable_list[1][ii_syl][2] 60 | except IndexError: 61 | raise IndexError(rec, ii_line) 62 | 63 | if special_class == "1" or special_class == "2": 64 | label_special = line_special_list[1][ii_syl][2] 65 | onset = line_special_list[1][ii_syl][0] 66 | offset = line_special_list[1][ii_syl][1] 67 | sf = int(round(onset * fs / float(hopsize))) # starting frame 68 | ef = int(round(offset * fs / float(hopsize))) # ending frame 69 | log_mel_syllable = log_mel[sf:ef, :] 70 | 71 | if len(log_mel_syllable): 72 | if special_class == "1": # shangkou 73 | num_special += 1 74 | if label_special in dic_syllable_special: 75 | dic_syllable_special[label_special].append(log_mel_syllable) 76 | else: 77 | dic_syllable_special[label_special] = [log_mel_syllable] 78 | if special_class == "2": # jiantuan 79 | num_jianzi += 1 80 | if label_special in dic_syllable_jianzi: 81 | dic_syllable_jianzi[label_special].append(log_mel_syllable) 82 | else: 83 | dic_syllable_jianzi[label_special] = [log_mel_syllable] 84 | 85 | elif not special_class.isdigit(): 86 | onset = line_syllable_list[1][ii_syl][0] 87 | offset = line_syllable_list[1][ii_syl][1] 88 | sf = int(round(onset * fs / float(hopsize))) # starting frame 89 | ef = int(round(offset * fs / float(hopsize))) # ending frame 90 | log_mel_syllable = log_mel[sf:ef, :] 91 | 92 | if len(log_mel_syllable): 93 | if syllable in list_normal_special: 94 | if syllable in dic_syllable_special_normal: 95 | dic_syllable_special_normal[syllable].append(log_mel_syllable) 96 | else: 97 | dic_syllable_special_normal[syllable] = [log_mel_syllable] 98 | if syllable in list_normal_jianzi: 99 | if syllable in dic_syllable_jianzi_normal: 100 | dic_syllable_jianzi_normal[syllable].append(log_mel_syllable) 101 | else: 102 | dic_syllable_jianzi_normal[syllable] = [log_mel_syllable] 103 | else: 104 | pass 105 | 106 | return dic_syllable_special, dic_syllable_jianzi, dic_syllable_special_normal, dic_syllable_jianzi_normal 107 | 108 | 109 | if __name__ == "__main__": 110 | 111 | with open(filename_normal_special, "r") as f: 112 | list_normal_special = json.load(f) 113 | with open(filename_normal_jianzi, "r") as f: 114 | list_normal_jianzi = json.load(f) 115 | 116 | dic_syllable_special, dic_syllable_jianzi, dic_syllable_special_normal, dic_syllable_jianzi_normal = \ 117 | dump_feature_syllable(recordings=recordings_train, 118 | list_normal_special=list_normal_special, 119 | list_normal_jianzi=list_normal_jianzi) 120 | 121 | with open(dict_special_positive, "wb") as f: 122 | pickle.dump(dic_syllable_special, f) 123 | 124 | with open(dict_special_negative, "wb") as f: 125 | pickle.dump(dic_syllable_special_normal, f) 126 | 127 | with open(dict_jianzi_positive, "wb") as f: 128 | pickle.dump(dic_syllable_jianzi, f) 129 | 130 | with open(dict_jianzi_negative, "wb") as f: 131 | pickle.dump(dic_syllable_jianzi_normal, f) 132 | -------------------------------------------------------------------------------- /neural_net/training_scripts/models_RNN.py: -------------------------------------------------------------------------------- 1 | from keras.models import Input 2 | from keras.models import Model 3 | from keras.models import load_model 4 | from keras.layers import Dropout 5 | from keras.layers import LSTM 6 | from keras.layers import CuDNNLSTM 7 | from keras.layers import Bidirectional 8 | from keras.layers import Dense 9 | from keras.layers import Conv1D 10 | from keras.layers import Conv2D 11 | from keras.layers import Dot 12 | from keras.layers import Lambda 13 | from keras.layers import MaxPooling2D 14 | from keras.layers import Reshape 15 | from keras import backend as K 16 | from keras.callbacks import EarlyStopping 17 | from keras.callbacks import CSVLogger 18 | from keras.callbacks import ModelCheckpoint 19 | from keras.activations import softmax 20 | from tensorflow.python.client import device_lib 21 | 22 | import os 23 | import sys 24 | import numpy as np 25 | from sklearn.metrics import log_loss 26 | from sklearn.metrics import accuracy_score 27 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__)))) 28 | from neural_net.training_scripts.attention import Attention 29 | from neural_net.training_scripts.generator import generator_batch1 30 | 31 | 32 | def conv_module(conv, input_shape, input): 33 | if conv: 34 | x = Reshape((-1, input_shape[2]) + (1,))(input) 35 | x = Conv2D(filters=8, kernel_size=(1, 3), activation="relu")(x) 36 | x = MaxPooling2D(pool_size=(1, 3))(x) 37 | 38 | x = Conv2D(filters=16, kernel_size=(1, 3), activation="relu")(x) 39 | x = MaxPooling2D(pool_size=(1, 3))(x) 40 | shape = K.int_shape(x) 41 | x = Reshape((-1, shape[2] * shape[3]))(x) 42 | else: 43 | x = input 44 | return x 45 | 46 | 47 | def embedding_RNN_1_lstm(input_shape, conv=False, dropout=False, att=False): 48 | 49 | device = device_lib.list_local_devices()[0].device_type 50 | 51 | input = Input(batch_shape=input_shape) 52 | 53 | x = conv_module(conv, input_shape, input) 54 | 55 | if att: 56 | return_sequence = True 57 | else: 58 | return_sequence = False 59 | 60 | if device == 'CPU': 61 | if dropout: 62 | x = Bidirectional(LSTM(units=8, return_sequences=return_sequence, dropout=dropout))(x) 63 | x = Dropout(dropout)(x) 64 | else: 65 | x = Bidirectional(LSTM(units=8, return_sequences=return_sequence))(x) 66 | else: 67 | x = Bidirectional(CuDNNLSTM(units=8, return_sequences=return_sequence))(x) 68 | 69 | if att == "feedforward": 70 | print(K.shape(x)) 71 | x, attention = Attention(return_attention=True)(x) 72 | elif att == "selfatt": 73 | attention = Conv1D(filters=16, kernel_size=1, activation='tanh', padding='same', use_bias=True, 74 | kernel_initializer='glorot_uniform', bias_initializer='zeros', 75 | name="attention_layer1")(x) 76 | attention = Conv1D(filters=16, kernel_size=1, activation='linear', padding='same', 77 | use_bias=True, 78 | kernel_initializer='glorot_uniform', bias_initializer='zeros', 79 | name="attention_layer2")(attention) 80 | attention = Lambda(lambda x: softmax(x, axis=1), name="attention_vector")(attention) 81 | 82 | # Apply attention weights 83 | weighted_sequence_embedding = Dot(axes=[1, 1], normalize=False, name="weighted_sequence_embedding")( 84 | [attention, x]) 85 | 86 | # Add and normalize to obtain final sequence embedding 87 | x = Lambda(lambda x: K.l2_normalize(K.sum(x, axis=1)))(weighted_sequence_embedding) 88 | attention = weighted_sequence_embedding 89 | else: 90 | attention = None 91 | 92 | return x, input, attention 93 | 94 | 95 | def RNN_model_definition(input_shape, 96 | conv, 97 | dropout, 98 | attention, 99 | output_shape): 100 | x, input, att_vector = embedding_RNN_1_lstm(input_shape=input_shape, 101 | conv=conv, 102 | dropout=dropout, 103 | att=attention) 104 | 105 | # print("attention shape {}".format(K.shape(att_vector))) 106 | 107 | outputs = [Dense(output_shape, activation='sigmoid')(x), att_vector] 108 | 109 | model = Model(inputs=input, outputs=outputs) 110 | 111 | # model.compile(optimizer='adam', 112 | # loss='binary_crossentropy', 113 | # metrics=['accuracy']) 114 | 115 | return model, input, att_vector 116 | 117 | 118 | def train_RNN_batch(list_feature_fold_train, 119 | labels_fold_train, 120 | list_feature_fold_val, 121 | labels_fold_val, 122 | batch_size, 123 | input_shape, 124 | output_shape, 125 | file_path_model, 126 | filename_log, 127 | epoch, 128 | patience, 129 | scaler, 130 | attention, 131 | conv, 132 | dropout, 133 | summ=False, 134 | verbose=2): 135 | 136 | x, input, att_vector = embedding_RNN_1_lstm(input_shape=input_shape, 137 | conv=conv, 138 | dropout=dropout, 139 | att=attention) 140 | 141 | # print("attention shape {}".format(K.shape(att_vector))) 142 | 143 | outputs = Dense(output_shape, activation='sigmoid')(x) 144 | 145 | model = Model(inputs=input, outputs=outputs) 146 | 147 | model.compile(optimizer='adam', 148 | loss='binary_crossentropy', 149 | metrics=['accuracy']) 150 | 151 | if summ: 152 | model.summary() 153 | 154 | callbacks = [ModelCheckpoint(file_path_model, monitor='val_loss', verbose=0, save_best_only=True), 155 | EarlyStopping(monitor='val_loss', patience=patience, verbose=0), 156 | CSVLogger(filename=filename_log, separator=';')] 157 | 158 | print("start training with validation...") 159 | 160 | generator_train = generator_batch1(list_feature=list_feature_fold_train, 161 | labels=labels_fold_train, 162 | scaler=scaler) 163 | 164 | generator_val = generator_batch1(list_feature=list_feature_fold_val, 165 | labels=labels_fold_val, 166 | scaler=scaler) 167 | 168 | model.fit_generator(generator=generator_train, 169 | steps_per_epoch=len(list_feature_fold_train)/batch_size, 170 | validation_data=generator_val, 171 | validation_steps=len(list_feature_fold_val)/batch_size, 172 | callbacks=callbacks, 173 | epochs=epoch, 174 | verbose=verbose) 175 | 176 | return model 177 | 178 | 179 | def eval_RNN_model(list_feature_test, 180 | labels_test, 181 | file_path_model, 182 | attention, 183 | scaler): 184 | if attention == "feedforward": 185 | model = load_model(filepath=file_path_model, 186 | custom_objects={'Attention': Attention(return_attention=True)}) 187 | elif attention == "selfatt": 188 | model = load_model(filepath=file_path_model, 189 | custom_objects={'softmax': softmax}) 190 | else: 191 | model = load_model(file_path_model) 192 | 193 | list_y_pred = np.zeros((len(labels_test, ))) 194 | for ii in range(len(list_feature_test)): 195 | fea = list_feature_test[ii] 196 | fea = scaler.transform(fea) 197 | fea = np.expand_dims(fea, axis=0) 198 | y_pred = model.predict_on_batch(fea) 199 | list_y_pred[ii] = y_pred[0][0] 200 | 201 | loss_test = log_loss(y_true=labels_test, y_pred=list_y_pred) 202 | 203 | return loss_test -------------------------------------------------------------------------------- /kaldi_alignment/data/train/reco2file_and_channel: -------------------------------------------------------------------------------- 1 | Dan_jingju_a_cappella_singing_dataset_danAll_daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon 1 2 | Dan_jingju_a_cappella_singing_dataset_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm dafeh-Bi_yun_tian-Xi_xiang_ji01-qm 1 3 | Dan_jingju_a_cappella_singing_dataset_danAll_danbz-Bei_jiu_chan-Chun_gui_men01-qm danbz-Bei_jiu_chan-Chun_gui_men01-qm 1 4 | Dan_jingju_a_cappella_singing_dataset_danAll_danbz-Kan_dai_wang-Ba_wang_bie_ji01-qm danbz-Kan_dai_wang-Ba_wang_bie_ji01-qm 1 5 | Dan_jingju_a_cappella_singing_dataset_danAll_daspd-Du_shou_kong-Wang_jiang_ting-upf daspd-Du_shou_kong-Wang_jiang_ting-upf 1 6 | Dan_jingju_a_cappella_singing_dataset_danAll_daspd-Hai_dao_bing-Gui_fei_zui_jiu01-lon daspd-Hai_dao_bing-Gui_fei_zui_jiu01-lon 1 7 | Dan_jingju_a_cappella_singing_dataset_danAll_daspd-Hai_dao_bing-Gui_fei_zui_jiu02-qm daspd-Hai_dao_bing-Gui_fei_zui_jiu02-qm 1 8 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Chun_qiu_ting-Suo_lin_nang01-qm daxp-Chun_qiu_ting-Suo_lin_nang01-qm 1 9 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Guan_Shi_yin-Tian_nv_san_hua-lon daxp-Guan_Shi_yin-Tian_nv_san_hua-lon 1 10 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Jiao_Zhang_sheng-Hong_niang01-qm daxp-Jiao_Zhang_sheng-Hong_niang01-qm 1 11 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Jiao_Zhang_sheng-Hong_niang04-qm daxp-Jiao_Zhang_sheng-Hong_niang04-qm 1 12 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai01-upf daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai01-upf 1 13 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai02-qm daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai02-qm 1 14 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai04-qm daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai04-qm 1 15 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Zhe_cai_shi-Suo_lin_nang01-qm daxp-Zhe_cai_shi-Suo_lin_nang01-qm 1 16 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_teacher teacher 1 17 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student01 student01 1 18 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student02 student02 1 19 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student03 student03 1 20 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student04 student04 1 21 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student05 student05 1 22 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student06 student06 1 23 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher teacher 1 24 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student01 student01 1 25 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student02 student02 1 26 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student02_first_half student02_first_half 1 27 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student03 student03 1 28 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student04 student04 1 29 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student05 student05 1 30 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student06 student06 1 31 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher teacher 1 32 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_teacher teacher 1 33 | Dan_primary_school_recording_20171214SongRuoXuan_danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo_student_01 student_01 1 34 | Dan_primary_school_recording_20171214SongRuoXuan_danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo_student_02 student_02 1 35 | Dan_primary_school_recording_20171214SongRuoXuan_danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo_student_03 student_03 1 36 | Dan_primary_school_recording_20171214SongRuoXuan_danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo_teacher teacher 1 37 | Dan_primary_school_recording_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01 student_01 1 38 | Dan_primary_school_recording_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_teacher teacher 1 39 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_student_01 student_01 1 40 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_student_03 student_03 1 41 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_teacher teacher 1 42 | Laosheng_jingju_a_cappella_singing_dataset_extended_nacta2017_20170327LiaoJiaNi_lseh-Niang_zi_bu-Sou_gu_jiu-nacta lseh-Niang_zi_bu-Sou_gu_jiu-nacta 1 43 | Laosheng_jingju_a_cappella_singing_dataset_extended_nacta2017_20170327LiaoJiaNi_lsxp-Yi_ma_li-Wu_jia_po-nacta lsxp-Yi_ma_li-Wu_jia_po-nacta 1 44 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Tan_Yang_jia-Hong_yang_dong-qm lseh-Tan_Yang_jia-Hong_yang_dong-qm 1 45 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Wei_guo_jia-Hong_yang_dong01-lon lseh-Wei_guo_jia-Hong_yang_dong01-lon 1 46 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Wei_guo_jia-Hong_yang_dong02-qm lseh-Wei_guo_jia-Hong_yang_dong02-qm 1 47 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm lseh-Wo_ben_shi-Qiong_lin_yan-qm 1 48 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Yi_lun_ming-Wen_zhao_guan-qm lseh-Yi_lun_ming-Wen_zhao_guan-qm 1 49 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Zi_na_ri-Hong_yang_dong-qm lseh-Zi_na_ri-Hong_yang_dong-qm 1 50 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Guo_liao_yi-Wen_zhao_guan02-qm lsxp-Guo_liao_yi-Wen_zhao_guan02-qm 1 51 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Huai_nan_wang-Huai_he_ying01-lon lsxp-Huai_nan_wang-Huai_he_ying01-lon 1 52 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Huai_nan_wang-Huai_he_ying02-qm lsxp-Huai_nan_wang-Huai_he_ying02-qm 1 53 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Jiang_shen_er-San_jia_dian01-1-upf lsxp-Jiang_shen_er-San_jia_dian01-1-upf 1 54 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Jiang_shen_er-San_jia_dian01-2-upf lsxp-Jiang_shen_er-San_jia_dian01-2-upf 1 55 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Jiang_shen_er-San_jia_dian02-qm lsxp-Jiang_shen_er-San_jia_dian02-qm 1 56 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm 1 57 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm lsxp-Quan_qian_sui-Gan_lu_si-qm 1 58 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm lsxp-Shi_ye_shuo-Ding_jun_shan-qm 1 59 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Wo_ben_shi-Kong_cheng_ji-qm lsxp-Wo_ben_shi-Kong_cheng_ji-qm 1 60 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf 1 61 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Wo_zheng_zai-Kong_cheng_ji04-qm lsxp-Wo_zheng_zai-Kong_cheng_ji04-qm 1 62 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Xi_ri_you-Zhu_lian_zhai-qm lsxp-Xi_ri_you-Zhu_lian_zhai-qm 1 63 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher teacher 1 64 | Laosheng_primary_school_recording_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_student_01 student_01 1 65 | Laosheng_primary_school_recording_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_student_02 student_02 1 66 | Laosheng_primary_school_recording_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher teacher 1 67 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_teacher teacher 1 68 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_student_01 student_01 1 69 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_student_02 student_02 1 70 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher teacher 1 71 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou_student_01_sizhu student_01_sizhu 1 72 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou_teacher teacher 1 73 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_teacher teacher 1 74 | -------------------------------------------------------------------------------- /kaldi_alignment/data/test/wav.scp: -------------------------------------------------------------------------------- 1 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Fei_shi_wo-Hua_tian_cuo-dxjky/student01.wav 2 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Fei_shi_wo-Hua_tian_cuo-dxjky/student02.wav 3 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Fei_shi_wo-Hua_tian_cuo-dxjky/student03.wav 4 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student04 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Fei_shi_wo-Hua_tian_cuo-dxjky/student04.wav 5 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student01.wav 6 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student02.wav 7 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student03.wav 8 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student04 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student04.wav 9 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student05 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student05.wav 10 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student06 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student06.wav 11 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student07 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student07.wav 12 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_01.wav 13 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_02.wav 14 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_03.wav 15 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_04 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_04.wav 16 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_05 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_05.wav 17 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo/student_02.wav 18 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_01.wav 19 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_02.wav 20 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_03.wav 21 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_04 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_04.wav 22 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_05 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_05.wav 23 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_06 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_06.wav 24 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_01_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_01_dxjky.wav 25 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_01_sizhu /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_01_sizhu.wav 26 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_02_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_02_dxjky.wav 27 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_03_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_03_dxjky.wav 28 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_04_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_04_dxjky.wav 29 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_05_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_05_dxjky.wav 30 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_06_mentougou /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_06_mentougou.wav 31 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Jiang_shen_er-San_jia_dian-sizhu/student_01.wav 32 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Jiang_shen_er-San_jia_dian-sizhu/student_02.wav 33 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_04_mentougou /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Jiang_shen_er-San_jia_dian-sizhu/student_04_mentougou.wav 34 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Wei_guo_jia-Hong_yang_dong-sizhu/student_01.wav 35 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Wei_guo_jia-Hong_yang_dong-sizhu/student_02.wav 36 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Wei_guo_jia-Hong_yang_dong-sizhu/student_03.wav 37 | -------------------------------------------------------------------------------- /kaldi_alignment/srcPy/parseLang.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from kaldi_alignment.srcPy.filePath import * 4 | from kaldi_alignment.srcPy.textgridParser import syllableTextgridExtraction 5 | 6 | 7 | def collectLexicon(path_textgrid, recording, tier0, tier1, lexicon): 8 | nestedSyllableLists, numLines, numSyllables = syllableTextgridExtraction(path_textgrid, 9 | recording, 10 | tier0, 11 | tier1) 12 | for syl in nestedSyllableLists: 13 | lexicon_unit = syl[0][2].strip().upper()+' ' 14 | for ii_pho, pho in enumerate(syl[1]): 15 | # if pho[2] == '?': 16 | # continue 17 | if not len(pho[2]) and ii_pho >= len(syl[1])-1: 18 | lexicon_unit = lexicon_unit[:-1] # remove the last space 19 | break 20 | if len(pho[2]): 21 | lexicon_unit += pho[2] 22 | else: 23 | lexicon_unit += 'sil_phone' 24 | if ii_pho != len(syl[1])-1: 25 | lexicon_unit += ' ' # add a space in the end of the char 26 | lexicon.append(lexicon_unit) 27 | return lexicon 28 | 29 | 30 | def collect_lexicon_syllable_special(path_textgrid, recording, syllable_tier, special_tier, phoneme_tier, lexicon): 31 | nestedSyllableLists, numLines, numSyllables = syllableTextgridExtraction(path_textgrid, 32 | recording, 33 | syllable_tier, 34 | phoneme_tier) 35 | 36 | nestedSpecialLists, numLines, numSpecial = syllableTextgridExtraction(path_textgrid, 37 | recording, 38 | special_tier, 39 | phoneme_tier) 40 | for ii_syl, syl in enumerate(nestedSyllableLists): 41 | lexicon_unit = syl[0][2].strip().upper() + ' ' + nestedSpecialLists[ii_syl][0][2].strip().upper() + ' ' 42 | for ii_pho, pho in enumerate(syl[1]): 43 | # if pho[2] == '?': 44 | # continue 45 | if not len(pho[2]) and ii_pho >= len(syl[1])-1: 46 | lexicon_unit = lexicon_unit[:-1] # remove the last space 47 | break 48 | if len(pho[2]): 49 | lexicon_unit += pho[2] 50 | else: 51 | lexicon_unit += 'sil_phone' 52 | if ii_pho != len(syl[1])-1: 53 | lexicon_unit += ' ' # add a space in the end of the char 54 | lexicon.append(lexicon_unit) 55 | return lexicon 56 | 57 | 58 | def organizeRepetition(lexicon, repetition=False): 59 | """ 60 | give the syllable repetition different name 61 | """ 62 | names_syllable = {} 63 | lexicon_organized = [] 64 | dict_lexicon_organized = {} 65 | for l in lexicon: 66 | syls = l.split() 67 | 68 | if repetition: 69 | if syls[0] not in names_syllable.keys(): 70 | names_syllable[syls[0]] = 0 71 | else: 72 | names_syllable[syls[0]] += 1 73 | 74 | syls[0] = syls[0]+str(names_syllable[syls[0]]) 75 | 76 | lexicon_unit = ' '.join(syls) 77 | 78 | # remove repetition 79 | if lexicon_unit not in lexicon_organized: 80 | lexicon_organized.append(lexicon_unit) 81 | dict_lexicon_organized[syls[0]] = syls[1:] 82 | return lexicon_organized, dict_lexicon_organized 83 | 84 | 85 | def organize_repetition_syllable_special(lexicon, repetition=False): 86 | """ 87 | give the syllable repetition different name, lexicon contains syllable and special 88 | """ 89 | names_syllable = {} 90 | lexicon_organized = [] 91 | dict_lexicon_organized = {} 92 | 93 | for l in lexicon: 94 | print(l) 95 | syls = l[0].split() 96 | 97 | if repetition: 98 | if syls[0] not in names_syllable.keys(): 99 | names_syllable[syls[0]] = 0 100 | else: 101 | names_syllable[syls[0]] += 1 102 | 103 | syls[0] = syls[0]+str(names_syllable[syls[0]]) 104 | 105 | lexicon_unit = [' '.join(syls), l[1:]] 106 | 107 | # remove repetition 108 | if lexicon_unit[0] not in lexicon_organized: 109 | lexicon_organized.append(lexicon_unit[0]) 110 | dict_lexicon_organized[syls[0]] = [syls[1:], l[1:]] 111 | return lexicon_organized, dict_lexicon_organized 112 | 113 | 114 | def writeLexicon(path_lang, lexicon, repetition=False): 115 | if repetition: 116 | filename_lexicon = 'lexicon.txt' 117 | else: 118 | filename_lexicon = 'lexicon_no_rep.txt' 119 | 120 | with open(os.path.join(path_lang, filename_lexicon), "w") as f: 121 | f.write('SIL sil\n') 122 | # f.write('SIL_PHONE sil_phone\n') 123 | # f.write('SILDAN silDan\n') 124 | f.write(' sil\n') 125 | 126 | for l in lexicon: 127 | f.write(l) 128 | f.write('\n') 129 | 130 | 131 | if __name__ == '__main__': 132 | 133 | # train: organize lexicon with repetition, 134 | # test: organize lexicon without repetition. 135 | train_test = 'test' 136 | 137 | lexicon = [] 138 | 139 | if train_test == 'train': 140 | 141 | for rec in recordings_train+recordings_test: 142 | data_path, sub_folder, textgrid_folder, \ 143 | wav_folder, filename, line_tier, longsyllable_tier, syllable_tier, \ 144 | phoneme_tier, special_tier, special_class_tier, roletype = parse_recordings(rec) 145 | 146 | lexicon = collectLexicon(path_textgrid=os.path.join(path_root, data_path, textgrid_folder, sub_folder), 147 | recording=filename, 148 | tier0=special_tier, 149 | tier1=phoneme_tier, 150 | lexicon=lexicon) 151 | 152 | lexicon = list(set(lexicon)) 153 | 154 | lexicon_organized, dict_lexicon_organized = organizeRepetition(lexicon, repetition=True) 155 | 156 | writeLexicon(path_lang, lexicon_organized, repetition=True) 157 | 158 | with open(os.path.join(path_lang, "dict_lexicon_repetition.json"), "w") as write_file: 159 | json.dump(dict_lexicon_organized, write_file) 160 | else: 161 | lexicon_special = [] 162 | lexicon_syllable_special = [] 163 | 164 | for rec in recordings_train+recordings_test: 165 | data_path, sub_folder, textgrid_folder, \ 166 | wav_folder, filename, line_tier, longsyllable_tier, syllable_tier, \ 167 | phoneme_tier, special_tier, special_class_tier, roletype = parse_recordings(rec) 168 | 169 | lexicon = collectLexicon(path_textgrid=os.path.join(path_root, data_path, textgrid_folder, sub_folder), 170 | recording=filename, 171 | tier0=syllable_tier, 172 | tier1=phoneme_tier, 173 | lexicon=lexicon) 174 | 175 | lexicon_special = collectLexicon( 176 | path_textgrid=os.path.join(path_root, data_path, textgrid_folder, sub_folder), 177 | recording=filename, 178 | tier0=special_tier, 179 | tier1=phoneme_tier, 180 | lexicon=lexicon_special) 181 | 182 | lexicon_syllable_special = collect_lexicon_syllable_special(path_textgrid=os.path.join(path_root, data_path, textgrid_folder, sub_folder), 183 | recording=filename, 184 | syllable_tier=syllable_tier, 185 | special_tier=special_tier, 186 | phoneme_tier=phoneme_tier, 187 | lexicon=lexicon_syllable_special) 188 | 189 | lexicon = list(set(lexicon)) 190 | 191 | lexicon_special = list(set(lexicon_special)) 192 | 193 | lexicon_syllable_special = list(set(lexicon_syllable_special)) 194 | 195 | # get a list ['SYL phn0 phn1 phn2', 'SPECIAL'] 196 | lexicon_remove_rep = [] 197 | for pron_special in lexicon_special: 198 | lexicon_unit = [pron_special] 199 | for word_entry in lexicon_syllable_special: 200 | syl = word_entry.split(' ')[0] 201 | if pron_special == ' '.join(word_entry.split(' ')[1:]): 202 | lexicon_unit.append(syl) 203 | lexicon_remove_rep.append(lexicon_unit) 204 | 205 | lexicon_organized, dict_lexicon_organized = organizeRepetition(lexicon, repetition=False) 206 | 207 | # dict_lexicon_organized_syllable_special, {SPECIAL: [[phn0 phn1 phn2], [SYL0 SYL1]]} 208 | lexicon_organized_syllable_special, dict_lexicon_organized_syllable_special = \ 209 | organize_repetition_syllable_special(lexicon_remove_rep, repetition=True) 210 | 211 | with open(os.path.join(path_lang, "dict_lexicon_repetition_syllable_special.json"), "w") as write_file: 212 | json.dump(dict_lexicon_organized_syllable_special, write_file) -------------------------------------------------------------------------------- /neural_net/keras_tcn/README.md: -------------------------------------------------------------------------------- 1 | # Keras TCN 2 | *Keras Temporal Convolutional Network* 3 | 4 | * [Keras TCN](#keras-tcn) 5 | * [Why Temporal Convolutional Network?](#why-temporal-convolutional-network) 6 | * [API](#api) 7 | * [Regression (Many to one) e.g. adding problem](#--regression-many-to-one-eg-adding-problem) 8 | * [Classification (Many to one) e.g. copy memory task](#--classification-many-to-one-eg-copy-memory-task) 9 | * [Classification (Many to one) e.g. sequential mnist task](#--classification-many-to-one-eg-sequential-mnist-task) 10 | * [Installation](#installation) 11 | * [Run](#run) 12 | * [Tasks](#tasks) 13 | * [References](#references) 14 | 15 | ## Why Temporal Convolutional Network? 16 | 17 | - TCNs exhibit longer memory than recurrent architectures with the same capacity. 18 | - Constantly performs better than LSTM/GRU architectures on a vast range of tasks (Seq. MNIST, Adding Problem, Copy Memory, Word-level PTB...). 19 | - Parallelism, flexible receptive field size, stable gradients, low memory requirements for training, variable length inputs... 20 | 21 |

22 | 23 | Visualization of a stack of dilated causal convolutional layers (Wavenet, 2016)

24 |

25 | 26 | ## API 27 | 28 | After installation, the model can be imported like this: 29 | 30 | ``` 31 | from tcn import tcn 32 | ``` 33 | 34 | In the following examples, we assume the input to have a shape `(batch_size, timesteps, input_dim)`. 35 | 36 | The model is a Keras model. The model functions (`model.summary`, `model.fit`, `model.predict`...) are all functional. 37 | 38 | 39 | 40 | ### - Regression (Many to one) e.g. adding problem 41 | 42 | ``` 43 | model = tcn.dilated_tcn(output_slice_index='last', 44 | num_feat=input_dim, 45 | num_classes=None, 46 | nb_filters=24, 47 | kernel_size=8, 48 | dilatations=[1, 2, 4, 8], 49 | nb_stacks=8, 50 | max_len=timesteps, 51 | activation='norm_relu', 52 | regression=True) 53 | ``` 54 | 55 | For a Many to Many regression, a cheap fix for now is to change the [number of units of the final Dense layer](https://github.com/philipperemy/keras-tcn/blob/8151b4a87f906fd856fd1c113c48392d542d0994/tcn/tcn.py#L90). 56 | 57 | ### - Classification (Many to many) e.g. copy memory task 58 | 59 | ``` 60 | model = tcn.dilated_tcn(num_feat=input_dim, 61 | num_classes=10, 62 | nb_filters=10, 63 | kernel_size=8, 64 | dilatations=[1, 2, 4, 8], 65 | nb_stacks=8, 66 | max_len=timesteps, 67 | activation='norm_relu') 68 | ``` 69 | 70 | ### - Classification (Many to one) e.g. sequential mnist task 71 | 72 | ``` 73 | model = tcn.dilated_tcn(output_slice_index='last', 74 | num_feat=input_dim, 75 | num_classes=10, 76 | nb_filters=64, 77 | kernel_size=8, 78 | dilatations=[1, 2, 4, 8], 79 | nb_stacks=8, 80 | max_len=timesteps, 81 | activation='norm_relu') 82 | ``` 83 | 84 | ## Installation 85 | 86 | ``` 87 | git clone git@github.com:philipperemy/keras-tcn.git 88 | cd keras-tcn 89 | virtualenv -p python3.6 venv 90 | source venv/bin/activate 91 | pip install -r requirements.txt # change to tensorflow if you dont have a gpu. 92 | python setup.py install # install keras-tcn as a package 93 | ``` 94 | 95 | ## Run 96 | 97 | Once `keras-tcn` is installed as a package, you can take a glimpse of what's possible to do with TCNs. Some tasks examples are available in the repository for this purpose: 98 | 99 | ``` 100 | cd adding_problem/ 101 | python main.py # run adding problem task 102 | 103 | cd copy_memory/ 104 | python main.py # run copy memory task 105 | 106 | cd mnist_pixel/ 107 | python main.py # run sequential mnist pixel task 108 | ``` 109 | 110 | ## Tasks 111 | 112 | ### Adding Task 113 | 114 | The task consists of feeding a large array of decimal numbers to the network, along with a boolean array of the same length. The objective is to sum the two decimals where the boolean array contain the two 1s. 115 | 116 | #### Explanation 117 | 118 |

119 | 120 | Adding Problem Task

121 |

122 | 123 | #### Implementation results 124 | 125 | The model takes time to learn this task. It's symbolized by a very long plateau (could take ~8 epochs on some runs). 126 | 127 | ``` 128 | 200000/200000 [==============================] - 451s 2ms/step - loss: 0.1749 - val_loss: 0.1662 129 | 200000/200000 [==============================] - 449s 2ms/step - loss: 0.1681 - val_loss: 0.1676 130 | 200000/200000 [==============================] - 449s 2ms/step - loss: 0.1677 - val_loss: 0.1663 131 | 200000/200000 [==============================] - 449s 2ms/step - loss: 0.1676 - val_loss: 0.1652 132 | 200000/200000 [==============================] - 449s 2ms/step - loss: 0.1165 - val_loss: 0.0093 133 | 200000/200000 [==============================] - 448s 2ms/step - loss: 0.0083 - val_loss: 0.0033 134 | 200000/200000 [==============================] - 448s 2ms/step - loss: 0.0040 - val_loss: 0.0012 135 | ``` 136 | 137 | ### Copy Memory Task 138 | 139 | The copy memory consists of a very large array: 140 | - At the beginning, there's the vector x of length N. This is the vector to copy. 141 | - At the end, N+1 9s are present. The first 9 is seen as a delimiter. 142 | - In the middle, only 0s are there. 143 | 144 | The idea is to copy the content of the vector x to the end of the large array. The task is made sufficiently complex by increasing the number of 0s in the middle. 145 | 146 | #### Explanation 147 | 148 |

149 | 150 | Copy Memory Task

151 |

152 | 153 | #### Implementation results 154 | 155 | ``` 156 | 10000/10000 [==============================] - 20s 2ms/step - loss: 0.3474 - acc: 0.8985 - val_loss: 0.0362 - val_acc: 0.9859 157 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0360 - acc: 0.9859 - val_loss: 0.0353 - val_acc: 0.9859 158 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0351 - acc: 0.9859 - val_loss: 0.0345 - val_acc: 0.9859 159 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0342 - acc: 0.9860 - val_loss: 0.0336 - val_acc: 0.9860 160 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0332 - acc: 0.9865 - val_loss: 0.0307 - val_acc: 0.9883 161 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0240 - acc: 0.9898 - val_loss: 0.0157 - val_acc: 0.9933 162 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0136 - acc: 0.9951 - val_loss: 0.0094 - val_acc: 0.9976 163 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0087 - acc: 0.9978 - val_loss: 0.0049 - val_acc: 1.0000 164 | 10000/10000 [==============================] - 14s 1ms/step - loss: 0.0050 - acc: 0.9992 - val_loss: 0.0020 - val_acc: 1.0000 165 | ``` 166 | 167 | ### Sequential MNIST 168 | 169 | #### Explanation 170 | 171 | The idea here is to consider MNIST images as 1-D sequences and feed them to the network. This task is particularly hard because sequences are 28*28 = 784 elements. In order to classify correctly, the network has to remember all the sequence. Usual LSTM are unable to perform well on this task. 172 | 173 |

174 | 175 | Sequential MNIST

176 |

177 | 178 | #### Implementation results 179 | 180 | ``` 181 | 60000/60000 [==============================] - 569s 9ms/step - loss: 0.2209 - acc: 0.9303 - val_loss: 0.0699 - val_acc: 0.9781 182 | 60000/60000 [==============================] - 545s 9ms/step - loss: 0.0784 - acc: 0.9760 - val_loss: 0.0507 - val_acc: 0.9843 183 | 60000/60000 [==============================] - 553s 9ms/step - loss: 0.0599 - acc: 0.9824 - val_loss: 0.0512 - val_acc: 0.9840 184 | 60000/60000 [==============================] - 555s 9ms/step - loss: 0.0493 - acc: 0.9851 - val_loss: 0.0569 - val_acc: 0.9824 185 | 60000/60000 [==============================] - 549s 9ms/step - loss: 0.0421 - acc: 0.9868 - val_loss: 0.0424 - val_acc: 0.9864 186 | 60000/60000 [==============================] - 558s 9ms/step - loss: 0.0358 - acc: 0.9886 - val_loss: 0.0416 - val_acc: 0.9874 187 | 60000/60000 [==============================] - 536s 9ms/step - loss: 0.0317 - acc: 0.9901 - val_loss: 0.0566 - val_acc: 0.9835 188 | 60000/60000 [==============================] - 483s 8ms/step - loss: 0.0272 - acc: 0.9915 - val_loss: 0.0565 - val_acc: 0.9845 189 | 60000/60000 [==============================] - 489s 8ms/step - loss: 0.0278 - acc: 0.9915 - val_loss: 0.0421 - val_acc: 0.9874 190 | 60000/60000 [==============================] - 483s 8ms/step - loss: 0.0227 - acc: 0.9929 - val_loss: 0.0464 - val_acc: 0.9882 191 | 60000/60000 [==============================] - 484s 8ms/step - loss: 0.0203 - acc: 0.9935 - val_loss: 0.0428 - val_acc: 0.9890 192 | 60000/60000 [==============================] - 484s 8ms/step - loss: 0.0212 - acc: 0.9934 - val_loss: 0.0539 - val_acc: 0.9884 193 | 60000/60000 [==============================] - 483s 8ms/step - loss: 0.0167 - acc: 0.9947 - val_loss: 0.0393 - val_acc: 0.9900 194 | ``` 195 | 196 | 197 | 198 | ## References 199 | - https://github.com/locuslab/TCN/ (TCN for Pytorch) 200 | - https://arxiv.org/pdf/1803.01271.pdf (An Empirical Evaluation of Generic Convolutional and Recurrent Networks 201 | for Sequence Modeling) 202 | - https://arxiv.org/pdf/1609.03499.pdf (Original Wavenet paper) 203 | -------------------------------------------------------------------------------- /kaldi_alignment/data/dev/spk2utt: -------------------------------------------------------------------------------- 1 | Dan Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_000 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_001 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_002 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_003 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_004 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_005 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_006 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_007 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_008 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_000 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_001 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_002 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_003 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_004 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_005 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_006 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_007 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_008 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_009 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_010 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_011 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_012 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_013 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_014 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_015 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_000 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_001 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_002 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_003 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_004 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_005 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_000 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_001 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_002 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_003 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_004 Dan_danAll_daeh-You_He_hou-He_hou_ma_dian-qm_000 Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_000 Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_001 Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_002 Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_003 Dan_danAll_dafeh-Mo_lai_you-Liu_yue_xue-qm_000 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_000 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_001 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_002 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_003 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_004 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_005 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_006 Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm_000 Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm_001 Dan_danAll_xixiangji_luanchouduo_000 Dan_danAll_xixiangji_luanchouduo_001 Dan_danAll_xixiangji_luanchouduo_002 Dan_danAll_xixiangji_luanchouduo_003 Dan_danAll_yutangchun_yutangchun_000 Dan_danAll_yutangchun_yutangchun_001 Dan_danAll_yutangchun_yutangchun_002 Dan_danAll_yutangchun_yutangchun_003 Dan_danAll_yutangchun_yutangchun_004 Dan_danAll_yutangchun_yutangchun_005 Dan_danAll_yutangchun_yutangchun_006 Dan_danAll_yutangchun_yutangchun_007 Dan_danAll_zhuangyuanmei_daocishi_000 Dan_danAll_zhuangyuanmei_daocishi_001 Dan_danAll_zhuangyuanmei_daocishi_002 Dan_danAll_zhuangyuanmei_daocishi_003 Dan_danAll_zhuangyuanmei_daocishi_004 Dan_danAll_zhuangyuanmei_daocishi_005 Dan_danAll_zhuangyuanmei_daocishi_006 Dan_danAll_zhuangyuanmei_daocishi_007 Dan_danAll_zhuangyuanmei_daocishi_008 Dan_danAll_zhuangyuanmei_daocishi_009 2 | Laosheng Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_000 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_001 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_002 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_003 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_004 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_005 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_006 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_007 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_008 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_009 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_010 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_000 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_001 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_002 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_003 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_004 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_005 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_006 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_007 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_008 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_009 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_010 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_000 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_001 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_002 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_003 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_004 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_005 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_006 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_007 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_008 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_009 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_010 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_011 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_000 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_001 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_002 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_003 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_004 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_005 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_000 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_001 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_002 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_003 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_004 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_005 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_006 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_007 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_008 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_009 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_010 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_011 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_012 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_000 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_001 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_002 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_003 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_004 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_005 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_006 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_007 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_008 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_009 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_010 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_011 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_012 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_013 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_014 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_015 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_016 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_017 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_018 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_019 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_020 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_021 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_022 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_023 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_024 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_025 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_000 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_001 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_002 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_003 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_004 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_005 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_006 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_007 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_008 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_009 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_010 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_011 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_012 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_013 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_014 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_015 3 | -------------------------------------------------------------------------------- /kaldi_alignment/data/dev/utt2spk: -------------------------------------------------------------------------------- 1 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_000 Dan 2 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_001 Dan 3 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_002 Dan 4 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_003 Dan 5 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_004 Dan 6 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_005 Dan 7 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_006 Dan 8 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_007 Dan 9 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_008 Dan 10 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_000 Dan 11 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_001 Dan 12 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_002 Dan 13 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_003 Dan 14 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_004 Dan 15 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_005 Dan 16 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_006 Dan 17 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_007 Dan 18 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_008 Dan 19 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_009 Dan 20 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_010 Dan 21 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_011 Dan 22 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_012 Dan 23 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_013 Dan 24 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_014 Dan 25 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_015 Dan 26 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_000 Dan 27 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_001 Dan 28 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_002 Dan 29 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_003 Dan 30 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_004 Dan 31 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_005 Dan 32 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_000 Dan 33 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_001 Dan 34 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_002 Dan 35 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_003 Dan 36 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_004 Dan 37 | Dan_danAll_daeh-You_He_hou-He_hou_ma_dian-qm_000 Dan 38 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_000 Dan 39 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_001 Dan 40 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_002 Dan 41 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_003 Dan 42 | Dan_danAll_dafeh-Mo_lai_you-Liu_yue_xue-qm_000 Dan 43 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_000 Dan 44 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_001 Dan 45 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_002 Dan 46 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_003 Dan 47 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_004 Dan 48 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_005 Dan 49 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_006 Dan 50 | Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm_000 Dan 51 | Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm_001 Dan 52 | Dan_danAll_xixiangji_luanchouduo_000 Dan 53 | Dan_danAll_xixiangji_luanchouduo_001 Dan 54 | Dan_danAll_xixiangji_luanchouduo_002 Dan 55 | Dan_danAll_xixiangji_luanchouduo_003 Dan 56 | Dan_danAll_yutangchun_yutangchun_000 Dan 57 | Dan_danAll_yutangchun_yutangchun_001 Dan 58 | Dan_danAll_yutangchun_yutangchun_002 Dan 59 | Dan_danAll_yutangchun_yutangchun_003 Dan 60 | Dan_danAll_yutangchun_yutangchun_004 Dan 61 | Dan_danAll_yutangchun_yutangchun_005 Dan 62 | Dan_danAll_yutangchun_yutangchun_006 Dan 63 | Dan_danAll_yutangchun_yutangchun_007 Dan 64 | Dan_danAll_zhuangyuanmei_daocishi_000 Dan 65 | Dan_danAll_zhuangyuanmei_daocishi_001 Dan 66 | Dan_danAll_zhuangyuanmei_daocishi_002 Dan 67 | Dan_danAll_zhuangyuanmei_daocishi_003 Dan 68 | Dan_danAll_zhuangyuanmei_daocishi_004 Dan 69 | Dan_danAll_zhuangyuanmei_daocishi_005 Dan 70 | Dan_danAll_zhuangyuanmei_daocishi_006 Dan 71 | Dan_danAll_zhuangyuanmei_daocishi_007 Dan 72 | Dan_danAll_zhuangyuanmei_daocishi_008 Dan 73 | Dan_danAll_zhuangyuanmei_daocishi_009 Dan 74 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_000 Laosheng 75 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_001 Laosheng 76 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_002 Laosheng 77 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_003 Laosheng 78 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_004 Laosheng 79 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_005 Laosheng 80 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_006 Laosheng 81 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_007 Laosheng 82 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_008 Laosheng 83 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_009 Laosheng 84 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_010 Laosheng 85 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_000 Laosheng 86 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_001 Laosheng 87 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_002 Laosheng 88 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_003 Laosheng 89 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_004 Laosheng 90 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_005 Laosheng 91 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_006 Laosheng 92 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_007 Laosheng 93 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_008 Laosheng 94 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_009 Laosheng 95 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_010 Laosheng 96 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_000 Laosheng 97 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_001 Laosheng 98 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_002 Laosheng 99 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_003 Laosheng 100 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_004 Laosheng 101 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_005 Laosheng 102 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_006 Laosheng 103 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_007 Laosheng 104 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_008 Laosheng 105 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_009 Laosheng 106 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_010 Laosheng 107 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_011 Laosheng 108 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_000 Laosheng 109 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_001 Laosheng 110 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_002 Laosheng 111 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_003 Laosheng 112 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_004 Laosheng 113 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_005 Laosheng 114 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_000 Laosheng 115 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_001 Laosheng 116 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_002 Laosheng 117 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_003 Laosheng 118 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_004 Laosheng 119 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_005 Laosheng 120 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_006 Laosheng 121 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_007 Laosheng 122 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_008 Laosheng 123 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_009 Laosheng 124 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_010 Laosheng 125 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_011 Laosheng 126 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_012 Laosheng 127 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_000 Laosheng 128 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_001 Laosheng 129 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_002 Laosheng 130 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_003 Laosheng 131 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_004 Laosheng 132 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_005 Laosheng 133 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_006 Laosheng 134 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_007 Laosheng 135 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_008 Laosheng 136 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_009 Laosheng 137 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_010 Laosheng 138 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_011 Laosheng 139 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_012 Laosheng 140 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_013 Laosheng 141 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_014 Laosheng 142 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_015 Laosheng 143 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_016 Laosheng 144 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_017 Laosheng 145 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_018 Laosheng 146 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_019 Laosheng 147 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_020 Laosheng 148 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_021 Laosheng 149 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_022 Laosheng 150 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_023 Laosheng 151 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_024 Laosheng 152 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_025 Laosheng 153 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_000 Laosheng 154 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_001 Laosheng 155 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_002 Laosheng 156 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_003 Laosheng 157 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_004 Laosheng 158 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_005 Laosheng 159 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_006 Laosheng 160 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_007 Laosheng 161 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_008 Laosheng 162 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_009 Laosheng 163 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_010 Laosheng 164 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_011 Laosheng 165 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_012 Laosheng 166 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_013 Laosheng 167 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_014 Laosheng 168 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_015 Laosheng 169 | --------------------------------------------------------------------------------