├── .gitignore
├── .idea
    ├── codeStyles
    │   └── codeStyleConfig.xml
    ├── dictionaries
    │   └── ronggong.xml
    ├── inspectionProfiles
    │   └── Project_Default.xml
    └── libraries
    │   └── R_User_Library.xml
├── LICENSE
├── README.md
├── kaldi_alignment
    ├── ali2Phones.sh
    ├── cmd.sh
    ├── conf
    │   └── mfcc.conf
    ├── createtextgridDan.praat
    ├── createtextgridLaosheng.praat
    ├── data
    │   ├── dev
    │   │   ├── char.stm
    │   │   ├── phone.txt
    │   │   ├── reco2file_and_channel
    │   │   ├── segments
    │   │   ├── spk2utt
    │   │   ├── stm
    │   │   ├── text
    │   │   ├── utt2spk
    │   │   └── wav.scp
    │   ├── dict
    │   │   ├── dict_lexicon_repetition.json
    │   │   ├── dict_lexicon_repetition_syllable_special.json
    │   │   ├── lexicon.txt
    │   │   ├── lexicon_no_rep.txt
    │   │   ├── lexiconp.txt
    │   │   ├── nonsilence_phones.txt
    │   │   ├── optional_silence.txt
    │   │   └── silence_phones.txt
    │   ├── dict_test
    │   │   ├── lexicon.txt
    │   │   ├── nonsilence_phones.txt
    │   │   ├── optional_silence.txt
    │   │   └── silence_phones.txt
    │   ├── test
    │   │   ├── char.stm
    │   │   ├── phone.txt
    │   │   ├── reco2file_and_channel
    │   │   ├── segments
    │   │   ├── spk2utt
    │   │   ├── stm
    │   │   ├── text
    │   │   ├── text_student
    │   │   ├── text_teacher
    │   │   ├── utt2spk
    │   │   └── wav.scp
    │   └── train
    │   │   ├── char.stm
    │   │   ├── phone.txt
    │   │   ├── reco2file_and_channel
    │   │   ├── segments
    │   │   ├── spk2utt
    │   │   ├── stm
    │   │   ├── text
    │   │   ├── utt2spk
    │   │   └── wav.scp
    ├── header.txt
    ├── id2phone.R
    ├── path.sh
    ├── run.sh
    ├── splitAlignments.py
    └── srcPy
    │   ├── csv_prepossessing.py
    │   ├── filePath.py
    │   ├── mispron_eval.py
    │   ├── mispronunciation_filelist_test.csv
    │   ├── mispronunciation_filelist_train.csv
    │   ├── parseLang.py
    │   ├── parseTextRepetition.py
    │   ├── parse_decoded_pronunciation.py
    │   ├── textgrid.py
    │   └── textgridParser.py
└── neural_net
    ├── __init__.py
    ├── combine_feature_label.py
    ├── data
        ├── mispronunciation_filelist_test.csv
        ├── mispronunciation_filelist_train.csv
        ├── normal_jianzi.json
        └── normal_special.json
    ├── eval.py
    ├── file_path.py
    ├── keras_tcn
        ├── .gitignore
        ├── LICENSE
        ├── README.md
        ├── __init__.py
        ├── adding_problem
        │   ├── README.md
        │   ├── __init__.py
        │   ├── main.py
        │   └── utils.py
        ├── copy_memory
        │   ├── README.md
        │   ├── __init__.py
        │   ├── main.py
        │   └── utils.py
        ├── misc
        │   ├── Adding_Task.png
        │   ├── Copy_Memory_Task.png
        │   ├── Dilated_Conv.png
        │   └── Sequential_MNIST_Task.png
        ├── mnist_pixel
        │   ├── __init__.py
        │   ├── main.py
        │   └── utils.py
        ├── setup.py
        └── tcn
        │   ├── __init__.py
        │   └── tcn.py
    ├── model
        └── segmentation
        │   ├── jan_joint0.h5
        │   └── scaler_joint.pkl
    ├── normal_pronunciation.py
    ├── onsetSegmentEval
        ├── __init__.py
        ├── evaluation.py
        ├── phonemeMap.py
        └── runEval.py
    ├── parameters.py
    ├── plot_code.py
    ├── training_sample_collection_syllable.py
    ├── training_scripts
        ├── attention.py
        ├── generator.py
        ├── hpc_code
        │   ├── train_run_jianzi.py
        │   ├── train_run_jianzi_tcn.py
        │   ├── train_run_special.py
        │   └── train_run_special_tcn.py
        ├── models_RNN.py
        └── models_TCN.py
    ├── utils
        ├── audio_preprocessing.py
        ├── csv_preprocessing.py
        ├── textgrid.py
        ├── textgridParser.py
        ├── textgrid_preprocessing.py
        └── utils_functions.py
    └── viterbiDecodingPhonemeSeg.pyx


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/.idea/codeStyles/codeStyleConfig.xml:
--------------------------------------------------------------------------------
1 | <component name="ProjectCodeStyleConfiguration">
2 |   <state>
3 |     <option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
4 |   </state>
5 | </component>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
 5 |       <option name="ignoredErrors">
 6 |         <list>
 7 |           <option value="W191" />
 8 |           <option value="E101" />
 9 |         </list>
10 |       </option>
11 |     </inspection_tool>
12 |   </profile>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/R_User_Library.xml:
--------------------------------------------------------------------------------
1 | <component name="libraryTable">
2 |   <library name="R User Library">
3 |     <CLASSES />
4 |     <SOURCES />
5 |   </library>
6 | </component>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mispronunciation detection
 2 | Mispronunciation detection code for jingju singing voice.
 3 | 
 4 | Rong Gong's thesis "Automatic assessment of singing voice pronunciation: 
 5 | a case study with jingju music" chapter 6.
 6 | 
 7 | This repo contains two methods:
 8 | 
 9 | * Baseline - forced alignment system built on Kaldi.
10 | * Deep learning - discriminative model built using Keras and Tensorflow.
11 | 
12 | ## Baseline
13 | The main idea of the forced alignment-based mispronunciation detect is to use
14 | two lexicons respectively for training and testing phases. The detail of this
15 | idea is described in section 6.2.1 in the thesis.
16 | 
17 | We here only explain the general pipeline of the model training and testing. Please
18 | write to the author if you want to know how to use the code for your own
19 | experiment. Pipeline:
20 | 
21 | 1. generate language dictionary by using `srcPy/parseLang.py`.
22 | 2. generate all the files that Kaldi need, e.g., text, wav.scp, phone.txt, by
23 | `srcPy/parseTextRepetition.py`.
24 | 3. run the model training and decode the text for test data by `run.sh`
25 | 4. parse decoded pronunciation by `srcPy/parse_decoded_pronunciation.py`
26 | 5. evaluation `srcPy/mispron_eval.py`
27 | 
28 | ## Deep learning-based discriminative model
29 | 
30 | We built discriminative models for mispronunciation detection. Two types of model
31 | are built, one for special pronunciation, another for jiantuanzi syllables. We have
32 | experimented several deep learning architectures, such as BiLSTM, CNN, attention, 
33 | Temporal convolutional networks (TCNs), self-attention. The details are described in 
34 | sections 6.3 and 6.4 in the thesis. Here, we also only describe the pipeline of model 
35 | training and testing. Please write to the auther if you want to use the code for your own
36 | experiment. Pipeline:
37 | 
38 | 1. collecting training logarithmic Mel representation by `training_sample_collection_syllable.py`
39 | 2. train various deep learning architectures by using `train_rnn_jianzi.py` or `train_rnn_special.py` respectively for
40 | special pronunciation and jiantuanzi models. e.g., attention var can be `feedforward` or `selfatt`.
41 | 3. train TCNs architectures by using `train_rnn_special_tcn.py` and `train_rnn_jianzi_tcn.py`
42 | 4. evaluation by `eval.py`
43 | 
44 | ## Contact
45 | Rong Gong: rong.gong\<at\>upf.edu
46 | 
47 | ## Code license
48 | GNU Affero General Public License 3.0
49 | 


--------------------------------------------------------------------------------
/kaldi_alignment/ali2Phones.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #for i in exp/mono_ali/ali.*.gz; do
 4 | #	src/bin/ali-to-phones --ctm-output exp/mono_ali/final.mdl ark:"gunzip -c $i|" -> ${i%.gz}.ctm;
 5 | #done;
 6 | #
 7 | #cd exp/mono_ali
 8 | #
 9 | #cat *.ctm > merged_alignment.txt
10 | #
11 | #cd ../..
12 | #
13 | #R -f id2phone.R
14 | #
15 | #python splitAlignments.py
16 | #
17 | #cd ~/Desktop
18 | #rm -rf tmp && mkdir tmp || exit 1;
19 | #
20 | #header="/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/header.txt"
21 | #
22 | ## direct the terminal to the directory with the newly split session files
23 | ## ensure that the RegEx below will capture only the session files
24 | ## otherwise change this or move the other .txt files to a different folder
25 | #
26 | #for x in laosheng dan;do
27 | #	cd /Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/splitAli/$x
28 | #	for i in *.txt; do
29 | #		cat "$header" "$i" > /Users/ronggong/Desktop/tmp/xx.$$
30 | #		mv /Users/ronggong/Desktop/tmp/xx.$$ "$i"
31 | #	done
32 | #done
33 | #	cd ../..
34 | #
35 | #/Applications/Praat.app/Contents/MacOS/Praat "createtextgridDan.praat"
36 | /Applications/Praat.app/Contents/MacOS/praat "createtextgridLaosheng.praat"


--------------------------------------------------------------------------------
/kaldi_alignment/cmd.sh:
--------------------------------------------------------------------------------
1 | train_cmd="run.pl --mem 6G"
2 | decode_cmd="run.pl --mem 6G"
3 | 


--------------------------------------------------------------------------------
/kaldi_alignment/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=true
2 | --sample-frequency=44100
3 | 


--------------------------------------------------------------------------------
/kaldi_alignment/createtextgridDan.praat:
--------------------------------------------------------------------------------
  1 | # Created 3-27-15 Eleanor Chodroff 
  2 | 
  3 | wavDir$ = "/Users/ronggong/Documents_using/MTG_document/Jingju_arias"
  4 | dir$ = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/splitAli/dan"
  5 | 
  6 | Create Strings as file list... list_txt 'dir$'/*.txt
  7 | nFiles = Get number of strings
  8 | 
  9 | for i from 1 to nFiles
 10 | 	select Strings list_txt
 11 | 	filename$ = Get string... i
 12 | 	basename$ = filename$ - ".txt"
 13 | 	txtname$ = filename$ - ".txt"
 14 | 
 15 | 
 16 | 	if startsWith (basename$, "primary_school_recording")
 17 | 
 18 | 		rind_2017_start = rindex (basename$, "2017")
 19 | 
 20 | 		subfoldername_len = length (basename$) - rind_2017_start + 1
 21 | 		# pathname$ = left$ (basename$, rind - 2)
 22 | 		subfoldername$ = right$ (basename$, subfoldername_len)
 23 | 		artistname$ = left$ (subfoldername$,  index (subfoldername$, "_") - 1)
 24 | 		subfoldername2$ = right$ (subfoldername$, length (subfoldername$) - length (artistname$) - 1)
 25 | 
 26 | 		rind_t = rindex (subfoldername2$, "teacher")
 27 | 		rind_s = rindex (subfoldername2$, "student")
 28 | 		if rind_t
 29 | 			rind = rind_t
 30 | 		else
 31 | 			rind = rind_s
 32 | 		endif
 33 | 		filename_len = length (subfoldername2$) - rind + 1
 34 | 		filename_noext$ = right$ (subfoldername2$, filename_len)
 35 | 
 36 | 		subfoldername2$ = left$ (subfoldername2$, length (subfoldername2$) - filename_len - 1)
 37 | 
 38 | 		#writeInfoLine: basename$
 39 | 		#writeInfoLine: pathname$
 40 | 		#writeInfoLine: subfoldername$
 41 | 		#writeInfoLine: artistname$
 42 | 		#writeInfoLine: subfoldername2$
 43 | 		#writeInfoLine: filename_noext$
 44 | 
 45 | 		Read from file... 'wavDir$'/primary_school_recording/wav_left/'artistname$'/'subfoldername2$'/'filename_noext$'.wav
 46 | 	else
 47 | 		rind_dan = rindex (basename$, "danAll")
 48 | 		rind_laosheng = rindex (basename$, "laosheng")
 49 | 		if rind_dan
 50 | 			rind_roletype_start = rind_dan
 51 | 			rind_roletype_end = rind_dan + 6
 52 | 			roletype$ = "danAll"
 53 | 		else
 54 | 			rind_roletype_start = rind_laosheng
 55 | 			rind_roletype_end = rind_laosheng + 8
 56 | 			roletype$ = "laosheng"
 57 | 		endif
 58 | 
 59 | 		filename_len = length (basename$) - rind_roletype_end
 60 | 		#path_roletype_name = left$ (basename$, rind_roletype_start)
 61 | 		pathname$ = left$ (basename$,  rind_roletype_start - 2)
 62 | 		filename_noext$ = right$ (basename$, filename_len)
 63 | 
 64 | 		#writeInfoLine: rind_roletype_start
 65 | 		#writeInfoLine: rind_roletype_end
 66 | 		#writeInfoLine: basename$
 67 | 		#writeInfoLine: roletype$
 68 | 		#writeInfoLine: filename_noext$
 69 | 
 70 | 		Read from file... 'wavDir$'/jingju_a_cappella_singing_dataset/wav_left/'roletype$'/'filename_noext$'.wav
 71 | 	endif
 72 | 
 73 | 	dur = Get total duration
 74 | 
 75 | 	To TextGrid... "kaldiphone"
 76 | 
 77 | 	#pause 'txtname$'
 78 | 
 79 | 	select Strings list_txt
 80 | 	Read Table from tab-separated file... 'dir$'/'txtname$'.txt
 81 | 	Rename... times
 82 | 	nRows = Get number of rows
 83 | 	Sort rows... start
 84 | 	for j from 1 to nRows
 85 | 		select Table times
 86 | 		startutt_col$ = Get column label... 5
 87 | 		start_col$ = Get column label... 10
 88 | 		dur_col$ = Get column label... 6
 89 | 		phone_col$ = Get column label... 7
 90 | 		if j < nRows
 91 | 			startnextutt = Get value... j+1 'startutt_col$'
 92 | 		else
 93 | 			startnextutt = 0
 94 | 		endif
 95 | 		start = Get value... j 'start_col$'
 96 | 		phone$ = Get value... j 'phone_col$'
 97 | 		dur = Get value... j 'dur_col$'
 98 | 		end = start + dur
 99 | 		select TextGrid 'filename_noext$'
100 | 		int = Get interval at time... 1 start+0.005
101 | 		if start > 0 & startnextutt = 0
102 | 			Insert boundary... 1 start
103 | 			Set interval text... 1 int+1 'phone$'
104 | 			Insert boundary... 1 end
105 | 		elsif start = 0
106 | 			Set interval text... 1 int 'phone$'
107 | 		elsif start > 0
108 | 			Insert boundary... 1 start
109 | 			Set interval text... 1 int+1 'phone$'
110 | 		endif
111 | 		#pause
112 | 	endfor
113 | 	#pause
114 | 	Write to text file... 'dir$'/'basename$'.TextGrid
115 | 	select Table times
116 | 	plus Sound 'filename_noext$'
117 | 	plus TextGrid 'filename_noext$'
118 | 	Remove
119 | endfor


--------------------------------------------------------------------------------
/kaldi_alignment/createtextgridLaosheng.praat:
--------------------------------------------------------------------------------
  1 | # Created 3-27-15 Eleanor Chodroff 
  2 | 
  3 | wavDir$ = "/Users/ronggong/Documents_using/MTG_document/Jingju_arias"
  4 | dir$ = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/splitAli/laosheng"
  5 | 
  6 | Create Strings as file list... list_txt 'dir$'/*.txt
  7 | nFiles = Get number of strings
  8 | 
  9 | for i from 1 to nFiles
 10 | 
 11 | 	select Strings list_txt
 12 | 	filename$ = Get string... i
 13 | 	basename$ = filename$ - ".txt"
 14 | 	txtname$ = filename$ - ".txt"
 15 | 
 16 | 	if startsWith (basename$, "primary_school_recording")
 17 | 
 18 | 		rind_2017_start = rindex (basename$, "2017")
 19 | 
 20 | 		subfoldername_len = length (basename$) - rind_2017_start + 1
 21 | 		# pathname$ = left$ (basename$, rind - 2)
 22 | 		subfoldername$ = right$ (basename$, subfoldername_len)
 23 | 		artistname$ = left$ (subfoldername$,  index (subfoldername$, "_") - 1)
 24 | 		subfoldername2$ = right$ (subfoldername$, length (subfoldername$) - length (artistname$) - 1)
 25 | 
 26 | 		rind_t = rindex (subfoldername2$, "teacher")
 27 | 		rind_s = rindex (subfoldername2$, "student")
 28 | 		if rind_t
 29 | 			rind = rind_t
 30 | 		else
 31 | 			rind = rind_s
 32 | 		endif
 33 | 		filename_len = length (subfoldername2$) - rind + 1
 34 | 		filename_noext$ = right$ (subfoldername2$, filename_len)
 35 | 
 36 | 		subfoldername2$ = left$ (subfoldername2$, length (subfoldername2$) - filename_len - 1)
 37 | 
 38 | 		#writeInfoLine: basename$
 39 | 		#writeInfoLine: pathname$
 40 | 		#writeInfoLine: subfoldername$
 41 | 		#writeInfoLine: artistname$
 42 | 		#writeInfoLine: subfoldername2$
 43 | 		#writeInfoLine: filename_noext$
 44 | 
 45 | 		Read from file... 'wavDir$'/primary_school_recording/wav_left/'artistname$'/'subfoldername2$'/'filename_noext$'.wav
 46 | 	else
 47 | 		rind_dan = rindex (basename$, "danAll")
 48 | 		rind_laosheng = rindex (basename$, "laosheng")
 49 | 		if rind_dan
 50 | 			rind_roletype_start = rind_dan
 51 | 			rind_roletype_end = rind_dan + 6
 52 | 			roletype$ = "danAll"
 53 | 		else
 54 | 			rind_roletype_start = rind_laosheng
 55 | 			rind_roletype_end = rind_laosheng + 8
 56 | 			roletype$ = "laosheng"
 57 | 		endif
 58 | 
 59 | 		filename_len = length (basename$) - rind_roletype_end
 60 | 		#path_roletype_name = left$ (basename$, rind_roletype_start)
 61 | 		pathname$ = left$ (basename$,  rind_roletype_start - 2)
 62 | 		filename_noext$ = right$ (basename$, filename_len)
 63 | 
 64 | 		#writeInfoLine: rind_roletype_start
 65 | 		#writeInfoLine: rind_roletype_end
 66 | 		#writeInfoLine: basename$
 67 | 		#writeInfoLine: roletype$
 68 | 		#writeInfoLine: filename_noext$
 69 | 
 70 | 		Read from file... 'wavDir$'/jingju_a_cappella_singing_dataset/wav_left/'roletype$'/'filename_noext$'.wav
 71 | 	endif
 72 | 
 73 | 	dur = Get total duration
 74 | 	To TextGrid... "kaldiphone"
 75 | 
 76 | 	#pause 'txtname$'
 77 | 
 78 | 	select Strings list_txt
 79 | 	Read Table from tab-separated file... 'dir$'/'txtname$'.txt
 80 | 	Rename... times
 81 | 	nRows = Get number of rows
 82 | 	Sort rows... start
 83 | 	for j from 1 to nRows
 84 | 		select Table times
 85 | 		startutt_col$ = Get column label... 5
 86 | 		start_col$ = Get column label... 10
 87 | 		dur_col$ = Get column label... 6
 88 | 		phone_col$ = Get column label... 7
 89 | 		if j < nRows
 90 | 			startnextutt = Get value... j+1 'startutt_col$'
 91 | 		else
 92 | 			startnextutt = 0
 93 | 		endif
 94 | 		start = Get value... j 'start_col$'
 95 | 		phone$ = Get value... j 'phone_col$'
 96 | 		dur = Get value... j 'dur_col$'
 97 | 		end = start + dur
 98 | 		select TextGrid 'filename_noext$'
 99 | 		int = Get interval at time... 1 start+0.005
100 | 		if start > 0 & startnextutt = 0
101 | 			Insert boundary... 1 start
102 | 			Set interval text... 1 int+1 'phone$'
103 | 			Insert boundary... 1 end
104 | 		elsif start = 0
105 | 			Set interval text... 1 int 'phone$'
106 | 		elsif start > 0
107 | 			Insert boundary... 1 start
108 | 			Set interval text... 1 int+1 'phone$'
109 | 		endif
110 | 		#pause
111 | 	endfor
112 | 	#pause
113 | 	Write to text file... 'dir$'/'basename$'.TextGrid
114 | 	select Table times
115 | 	plus Sound 'filename_noext$'
116 | 	plus TextGrid 'filename_noext$'
117 | 	Remove
118 | endfor


--------------------------------------------------------------------------------
/kaldi_alignment/data/dev/reco2file_and_channel:
--------------------------------------------------------------------------------
 1 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher teacher 1
 2 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher teacher 1
 3 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01 student_01 1
 4 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02 student_02 1
 5 | Dan_danAll_daeh-You_He_hou-He_hou_ma_dian-qm daeh-You_He_hou-He_hou_ma_dian-qm 1
 6 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm dafeh-Bi_yun_tian-Xi_xiang_ji01-qm 1
 7 | Dan_danAll_dafeh-Mo_lai_you-Liu_yue_xue-qm dafeh-Mo_lai_you-Liu_yue_xue-qm 1
 8 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon 1
 9 | Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm danbz-Qing_chen_qi-Shi_yu_zhuo-qm 1
10 | Dan_danAll_xixiangji_luanchouduo xixiangji_luanchouduo 1
11 | Dan_danAll_yutangchun_yutangchun yutangchun_yutangchun 1
12 | Dan_danAll_zhuangyuanmei_daocishi zhuangyuanmei_daocishi 1
13 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher teacher 1
14 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher teacher 1
15 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher teacher 1
16 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm lseh-Wo_ben_shi-Qiong_lin_yan-qm 1
17 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm 1
18 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm lsxp-Quan_qian_sui-Gan_lu_si-qm 1
19 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm lsxp-Shi_ye_shuo-Ding_jun_shan-qm 1
20 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dev/spk2utt:
--------------------------------------------------------------------------------
1 | Dan Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_000 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_001 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_002 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_003 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_004 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_005 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_006 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_007 Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_008 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_000 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_001 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_002 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_003 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_004 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_005 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_006 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_007 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_008 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_009 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_010 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_011 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_012 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_013 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_014 Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_015 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_000 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_001 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_002 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_003 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_004 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_005 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_000 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_001 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_002 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_003 Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_004 Dan_danAll_daeh-You_He_hou-He_hou_ma_dian-qm_000 Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_000 Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_001 Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_002 Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_003 Dan_danAll_dafeh-Mo_lai_you-Liu_yue_xue-qm_000 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_000 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_001 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_002 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_003 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_004 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_005 Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_006 Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm_000 Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm_001 Dan_danAll_xixiangji_luanchouduo_000 Dan_danAll_xixiangji_luanchouduo_001 Dan_danAll_xixiangji_luanchouduo_002 Dan_danAll_xixiangji_luanchouduo_003 Dan_danAll_yutangchun_yutangchun_000 Dan_danAll_yutangchun_yutangchun_001 Dan_danAll_yutangchun_yutangchun_002 Dan_danAll_yutangchun_yutangchun_003 Dan_danAll_yutangchun_yutangchun_004 Dan_danAll_yutangchun_yutangchun_005 Dan_danAll_yutangchun_yutangchun_006 Dan_danAll_yutangchun_yutangchun_007 Dan_danAll_zhuangyuanmei_daocishi_000 Dan_danAll_zhuangyuanmei_daocishi_001 Dan_danAll_zhuangyuanmei_daocishi_002 Dan_danAll_zhuangyuanmei_daocishi_003 Dan_danAll_zhuangyuanmei_daocishi_004 Dan_danAll_zhuangyuanmei_daocishi_005 Dan_danAll_zhuangyuanmei_daocishi_006 Dan_danAll_zhuangyuanmei_daocishi_007 Dan_danAll_zhuangyuanmei_daocishi_008 Dan_danAll_zhuangyuanmei_daocishi_009
2 | Laosheng Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_000 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_001 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_002 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_003 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_004 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_005 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_006 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_007 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_008 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_009 Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_010 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_000 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_001 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_002 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_003 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_004 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_005 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_006 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_007 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_008 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_009 Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_010 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_000 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_001 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_002 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_003 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_004 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_005 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_006 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_007 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_008 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_009 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_010 Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_011 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_000 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_001 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_002 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_003 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_004 Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_005 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_000 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_001 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_002 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_003 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_004 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_005 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_006 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_007 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_008 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_009 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_010 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_011 Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_012 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_000 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_001 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_002 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_003 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_004 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_005 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_006 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_007 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_008 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_009 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_010 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_011 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_012 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_013 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_014 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_015 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_016 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_017 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_018 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_019 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_020 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_021 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_022 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_023 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_024 Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_025 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_000 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_001 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_002 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_003 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_004 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_005 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_006 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_007 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_008 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_009 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_010 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_011 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_012 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_013 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_014 Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_015
3 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dev/utt2spk:
--------------------------------------------------------------------------------
  1 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_000 Dan
  2 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_001 Dan
  3 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_002 Dan
  4 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_003 Dan
  5 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_004 Dan
  6 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_005 Dan
  7 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_006 Dan
  8 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_007 Dan
  9 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher_008 Dan
 10 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_000 Dan
 11 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_001 Dan
 12 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_002 Dan
 13 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_003 Dan
 14 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_004 Dan
 15 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_005 Dan
 16 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_006 Dan
 17 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_007 Dan
 18 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_008 Dan
 19 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_009 Dan
 20 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_010 Dan
 21 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_011 Dan
 22 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_012 Dan
 23 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_013 Dan
 24 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_014 Dan
 25 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher_015 Dan
 26 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_000 Dan
 27 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_001 Dan
 28 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_002 Dan
 29 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_003 Dan
 30 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_004 Dan
 31 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01_005 Dan
 32 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_000 Dan
 33 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_001 Dan
 34 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_002 Dan
 35 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_003 Dan
 36 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02_004 Dan
 37 | Dan_danAll_daeh-You_He_hou-He_hou_ma_dian-qm_000 Dan
 38 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_000 Dan
 39 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_001 Dan
 40 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_002 Dan
 41 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm_003 Dan
 42 | Dan_danAll_dafeh-Mo_lai_you-Liu_yue_xue-qm_000 Dan
 43 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_000 Dan
 44 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_001 Dan
 45 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_002 Dan
 46 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_003 Dan
 47 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_004 Dan
 48 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_005 Dan
 49 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon_006 Dan
 50 | Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm_000 Dan
 51 | Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm_001 Dan
 52 | Dan_danAll_xixiangji_luanchouduo_000 Dan
 53 | Dan_danAll_xixiangji_luanchouduo_001 Dan
 54 | Dan_danAll_xixiangji_luanchouduo_002 Dan
 55 | Dan_danAll_xixiangji_luanchouduo_003 Dan
 56 | Dan_danAll_yutangchun_yutangchun_000 Dan
 57 | Dan_danAll_yutangchun_yutangchun_001 Dan
 58 | Dan_danAll_yutangchun_yutangchun_002 Dan
 59 | Dan_danAll_yutangchun_yutangchun_003 Dan
 60 | Dan_danAll_yutangchun_yutangchun_004 Dan
 61 | Dan_danAll_yutangchun_yutangchun_005 Dan
 62 | Dan_danAll_yutangchun_yutangchun_006 Dan
 63 | Dan_danAll_yutangchun_yutangchun_007 Dan
 64 | Dan_danAll_zhuangyuanmei_daocishi_000 Dan
 65 | Dan_danAll_zhuangyuanmei_daocishi_001 Dan
 66 | Dan_danAll_zhuangyuanmei_daocishi_002 Dan
 67 | Dan_danAll_zhuangyuanmei_daocishi_003 Dan
 68 | Dan_danAll_zhuangyuanmei_daocishi_004 Dan
 69 | Dan_danAll_zhuangyuanmei_daocishi_005 Dan
 70 | Dan_danAll_zhuangyuanmei_daocishi_006 Dan
 71 | Dan_danAll_zhuangyuanmei_daocishi_007 Dan
 72 | Dan_danAll_zhuangyuanmei_daocishi_008 Dan
 73 | Dan_danAll_zhuangyuanmei_daocishi_009 Dan
 74 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_000 Laosheng
 75 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_001 Laosheng
 76 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_002 Laosheng
 77 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_003 Laosheng
 78 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_004 Laosheng
 79 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_005 Laosheng
 80 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_006 Laosheng
 81 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_007 Laosheng
 82 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_008 Laosheng
 83 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_009 Laosheng
 84 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher_010 Laosheng
 85 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_000 Laosheng
 86 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_001 Laosheng
 87 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_002 Laosheng
 88 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_003 Laosheng
 89 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_004 Laosheng
 90 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_005 Laosheng
 91 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_006 Laosheng
 92 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_007 Laosheng
 93 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_008 Laosheng
 94 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_009 Laosheng
 95 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher_010 Laosheng
 96 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_000 Laosheng
 97 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_001 Laosheng
 98 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_002 Laosheng
 99 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_003 Laosheng
100 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_004 Laosheng
101 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_005 Laosheng
102 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_006 Laosheng
103 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_007 Laosheng
104 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_008 Laosheng
105 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_009 Laosheng
106 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_010 Laosheng
107 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher_011 Laosheng
108 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_000 Laosheng
109 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_001 Laosheng
110 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_002 Laosheng
111 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_003 Laosheng
112 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_004 Laosheng
113 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm_005 Laosheng
114 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_000 Laosheng
115 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_001 Laosheng
116 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_002 Laosheng
117 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_003 Laosheng
118 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_004 Laosheng
119 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_005 Laosheng
120 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_006 Laosheng
121 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_007 Laosheng
122 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_008 Laosheng
123 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_009 Laosheng
124 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_010 Laosheng
125 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_011 Laosheng
126 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm_012 Laosheng
127 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_000 Laosheng
128 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_001 Laosheng
129 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_002 Laosheng
130 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_003 Laosheng
131 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_004 Laosheng
132 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_005 Laosheng
133 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_006 Laosheng
134 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_007 Laosheng
135 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_008 Laosheng
136 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_009 Laosheng
137 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_010 Laosheng
138 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_011 Laosheng
139 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_012 Laosheng
140 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_013 Laosheng
141 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_014 Laosheng
142 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_015 Laosheng
143 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_016 Laosheng
144 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_017 Laosheng
145 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_018 Laosheng
146 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_019 Laosheng
147 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_020 Laosheng
148 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_021 Laosheng
149 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_022 Laosheng
150 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_023 Laosheng
151 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_024 Laosheng
152 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm_025 Laosheng
153 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_000 Laosheng
154 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_001 Laosheng
155 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_002 Laosheng
156 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_003 Laosheng
157 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_004 Laosheng
158 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_005 Laosheng
159 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_006 Laosheng
160 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_007 Laosheng
161 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_008 Laosheng
162 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_009 Laosheng
163 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_010 Laosheng
164 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_011 Laosheng
165 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_012 Laosheng
166 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_013 Laosheng
167 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_014 Laosheng
168 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm_015 Laosheng
169 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dev/wav.scp:
--------------------------------------------------------------------------------
 1 | Dan_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Wo_jia_di-Hong_deng_ji-dxjky/teacher.wav
 2 | Dan_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp_Qing_zao_qi_lai-Mai_shui-dxjky/teacher.wav
 3 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo/student_01.wav
 4 | Dan_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo/student_02.wav
 5 | Dan_danAll_daeh-You_He_hou-He_hou_ma_dian-qm /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/danAll/daeh-You_He_hou-He_hou_ma_dian-qm.wav
 6 | Dan_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/danAll/dafeh-Bi_yun_tian-Xi_xiang_ji01-qm.wav
 7 | Dan_danAll_dafeh-Mo_lai_you-Liu_yue_xue-qm /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/danAll/dafeh-Mo_lai_you-Liu_yue_xue-qm.wav
 8 | Dan_danAll_dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/danAll/dagbz-Feng_xiao_xiao-Yang_men_nv_jiang-lon.wav
 9 | Dan_danAll_danbz-Qing_chen_qi-Shi_yu_zhuo-qm /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/danAll/danbz-Qing_chen_qi-Shi_yu_zhuo-qm.wav
10 | Dan_danAll_xixiangji_luanchouduo /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/danAll/xixiangji_luanchouduo.wav
11 | Dan_danAll_yutangchun_yutangchun /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/danAll/yutangchun_yutangchun.wav
12 | Dan_danAll_zhuangyuanmei_daocishi /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/danAll/zhuangyuanmei_daocishi.wav
13 | Laosheng_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/teacher.wav
14 | Laosheng_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lseh-Wo_men_shi-Zhi_qu-sizhu/teacher.wav
15 | Laosheng_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu/teacher.wav
16 | Laosheng_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/laosheng/lseh-Wo_ben_shi-Qiong_lin_yan-qm.wav
17 | Laosheng_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/laosheng/lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm.wav
18 | Laosheng_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/laosheng/lsxp-Quan_qian_sui-Gan_lu_si-qm.wav
19 | Laosheng_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm /Users/ronggong/Documents_using/MTG_document/Jingju_arias/jingju_a_cappella_singing_dataset/wav_left/laosheng/lsxp-Shi_ye_shuo-Ding_jun_shan-qm.wav
20 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dict/nonsilence_phones.txt:
--------------------------------------------------------------------------------
 1 | ?
 2 | @
 3 | 1
 4 | 7
 5 | 7N
 6 | 9
 7 | a
 8 | aI^
 9 | an
10 | AN
11 | AU^
12 | c
13 | E
14 | eI^
15 | En
16 | f
17 | H
18 | i
19 | in
20 | iN
21 | j
22 | k
23 | l
24 | m
25 | M
26 | n
27 | @n
28 | N
29 | o
30 | O
31 | oU^
32 | r\'
33 | u
34 | UN
35 | w
36 | x
37 | y
38 | yn
39 | sil_phone
40 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dict/optional_silence.txt:
--------------------------------------------------------------------------------
1 | sil
2 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dict/silence_phones.txt:
--------------------------------------------------------------------------------
1 | sil
2 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dict_test/nonsilence_phones.txt:
--------------------------------------------------------------------------------
 1 | ?
 2 | @
 3 | 1
 4 | 7
 5 | 7N
 6 | 9
 7 | a
 8 | aI^
 9 | an
10 | AN
11 | AU^
12 | c
13 | E
14 | eI^
15 | En
16 | f
17 | H
18 | i
19 | in
20 | iN
21 | j
22 | k
23 | l
24 | m
25 | M
26 | n
27 | @n
28 | N
29 | o
30 | O
31 | oU^
32 | r\'
33 | u
34 | UN
35 | w
36 | x
37 | y
38 | yn
39 | sil_phone
40 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dict_test/optional_silence.txt:
--------------------------------------------------------------------------------
1 | sil
2 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/dict_test/silence_phones.txt:
--------------------------------------------------------------------------------
1 | sil
2 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/test/reco2file_and_channel:
--------------------------------------------------------------------------------
 1 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student01 student01 1
 2 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student02 student02 1
 3 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student03 student03 1
 4 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student04 student04 1
 5 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student01 student01 1
 6 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student02 student02 1
 7 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student03 student03 1
 8 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student04 student04 1
 9 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student05 student05 1
10 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student06 student06 1
11 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student07 student07 1
12 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_01 student_01 1
13 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_02 student_02 1
14 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_03 student_03 1
15 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_04 student_04 1
16 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_05 student_05 1
17 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_student_02 student_02 1
18 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_01 student_01 1
19 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_02 student_02 1
20 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_03 student_03 1
21 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_04 student_04 1
22 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_05 student_05 1
23 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_06 student_06 1
24 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_01_dxjky student_01_dxjky 1
25 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_01_sizhu student_01_sizhu 1
26 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_02_dxjky student_02_dxjky 1
27 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_03_dxjky student_03_dxjky 1
28 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_04_dxjky student_04_dxjky 1
29 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_05_dxjky student_05_dxjky 1
30 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_06_mentougou student_06_mentougou 1
31 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_01 student_01 1
32 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_02 student_02 1
33 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_04_mentougou student_04_mentougou 1
34 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_01 student_01 1
35 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_02 student_02 1
36 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_03 student_03 1
37 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/test/wav.scp:
--------------------------------------------------------------------------------
 1 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Fei_shi_wo-Hua_tian_cuo-dxjky/student01.wav
 2 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Fei_shi_wo-Hua_tian_cuo-dxjky/student02.wav
 3 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Fei_shi_wo-Hua_tian_cuo-dxjky/student03.wav
 4 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_student04 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Fei_shi_wo-Hua_tian_cuo-dxjky/student04.wav
 5 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student01.wav
 6 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student02.wav
 7 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student03.wav
 8 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student04 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student04.wav
 9 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student05 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student05.wav
10 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student06 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student06.wav
11 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky_student07 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171211SongRuoXuan/daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky/student07.wav
12 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_01.wav
13 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_02.wav
14 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_03.wav
15 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_04 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_04.wav
16 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_student_05 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo/student_05.wav
17 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171214SongRuoXuan/daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo/student_02.wav
18 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_01.wav
19 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_02.wav
20 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_03.wav
21 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_04 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_04.wav
22 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_05 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_05.wav
23 | Dan_primary_school_recording_20171215SongRuoXuan_daxp-Jiao_zhang_sheng-Xi_shi-qianmen_student_06 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171215SongRuoXuan/daxp-Jiao_zhang_sheng-Xi_shi-qianmen/student_06.wav
24 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_01_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_01_dxjky.wav
25 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_01_sizhu /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_01_sizhu.wav
26 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_02_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_02_dxjky.wav
27 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_03_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_03_dxjky.wav
28 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_04_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_04_dxjky.wav
29 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_05_dxjky /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_05_dxjky.wav
30 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_student_06_mentougou /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/2017121718SongRuoXuan/lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu/student_06_mentougou.wav
31 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Jiang_shen_er-San_jia_dian-sizhu/student_01.wav
32 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Jiang_shen_er-San_jia_dian-sizhu/student_02.wav
33 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_student_04_mentougou /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Jiang_shen_er-San_jia_dian-sizhu/student_04_mentougou.wav
34 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_01 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Wei_guo_jia-Hong_yang_dong-sizhu/student_01.wav
35 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_02 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Wei_guo_jia-Hong_yang_dong-sizhu/student_02.wav
36 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_student_03 /Users/ronggong/Documents_using/MTG_document/Jingju_arias/primary_school_recording/wav_left/20171217TianHao/lsxp-Wei_guo_jia-Hong_yang_dong-sizhu/student_03.wav
37 | 


--------------------------------------------------------------------------------
/kaldi_alignment/data/train/reco2file_and_channel:
--------------------------------------------------------------------------------
 1 | Dan_jingju_a_cappella_singing_dataset_danAll_daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon 1
 2 | Dan_jingju_a_cappella_singing_dataset_danAll_dafeh-Bi_yun_tian-Xi_xiang_ji01-qm dafeh-Bi_yun_tian-Xi_xiang_ji01-qm 1
 3 | Dan_jingju_a_cappella_singing_dataset_danAll_danbz-Bei_jiu_chan-Chun_gui_men01-qm danbz-Bei_jiu_chan-Chun_gui_men01-qm 1
 4 | Dan_jingju_a_cappella_singing_dataset_danAll_danbz-Kan_dai_wang-Ba_wang_bie_ji01-qm danbz-Kan_dai_wang-Ba_wang_bie_ji01-qm 1
 5 | Dan_jingju_a_cappella_singing_dataset_danAll_daspd-Du_shou_kong-Wang_jiang_ting-upf daspd-Du_shou_kong-Wang_jiang_ting-upf 1
 6 | Dan_jingju_a_cappella_singing_dataset_danAll_daspd-Hai_dao_bing-Gui_fei_zui_jiu01-lon daspd-Hai_dao_bing-Gui_fei_zui_jiu01-lon 1
 7 | Dan_jingju_a_cappella_singing_dataset_danAll_daspd-Hai_dao_bing-Gui_fei_zui_jiu02-qm daspd-Hai_dao_bing-Gui_fei_zui_jiu02-qm 1
 8 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Chun_qiu_ting-Suo_lin_nang01-qm daxp-Chun_qiu_ting-Suo_lin_nang01-qm 1
 9 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Guan_Shi_yin-Tian_nv_san_hua-lon daxp-Guan_Shi_yin-Tian_nv_san_hua-lon 1
10 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Jiao_Zhang_sheng-Hong_niang01-qm daxp-Jiao_Zhang_sheng-Hong_niang01-qm 1
11 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Jiao_Zhang_sheng-Hong_niang04-qm daxp-Jiao_Zhang_sheng-Hong_niang04-qm 1
12 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai01-upf daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai01-upf 1
13 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai02-qm daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai02-qm 1
14 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai04-qm daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai04-qm 1
15 | Dan_jingju_a_cappella_singing_dataset_danAll_daxp-Zhe_cai_shi-Suo_lin_nang01-qm daxp-Zhe_cai_shi-Suo_lin_nang01-qm 1
16 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Fei_shi_wo-Hua_tian_cuo-dxjky_teacher teacher 1
17 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student01 student01 1
18 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student02 student02 1
19 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student03 student03 1
20 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student04 student04 1
21 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student05 student05 1
22 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_student06 student06 1
23 | Dan_primary_school_recording_20171211SongRuoXuan_daxp-Wo_jia_di-Hong_deng_ji-dxjky_teacher teacher 1
24 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student01 student01 1
25 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student02 student02 1
26 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student02_first_half student02_first_half 1
27 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student03 student03 1
28 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student04 student04 1
29 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student05 student05 1
30 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_student06 student06 1
31 | Dan_primary_school_recording_20171211SongRuoXuan_daxp_Qing_zao_qi_lai-Mai_shui-dxjky_teacher teacher 1
32 | Dan_primary_school_recording_20171214SongRuoXuan_daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo_teacher teacher 1
33 | Dan_primary_school_recording_20171214SongRuoXuan_danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo_student_01 student_01 1
34 | Dan_primary_school_recording_20171214SongRuoXuan_danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo_student_02 student_02 1
35 | Dan_primary_school_recording_20171214SongRuoXuan_danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo_student_03 student_03 1
36 | Dan_primary_school_recording_20171214SongRuoXuan_danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo_teacher teacher 1
37 | Dan_primary_school_recording_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_student_01 student_01 1
38 | Dan_primary_school_recording_20171214SongRuoXuan_daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo_teacher teacher 1
39 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_student_01 student_01 1
40 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_student_03 student_03 1
41 | Dan_primary_school_recording_20171214SongRuoXuan_daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo_teacher teacher 1
42 | Laosheng_jingju_a_cappella_singing_dataset_extended_nacta2017_20170327LiaoJiaNi_lseh-Niang_zi_bu-Sou_gu_jiu-nacta lseh-Niang_zi_bu-Sou_gu_jiu-nacta 1
43 | Laosheng_jingju_a_cappella_singing_dataset_extended_nacta2017_20170327LiaoJiaNi_lsxp-Yi_ma_li-Wu_jia_po-nacta lsxp-Yi_ma_li-Wu_jia_po-nacta 1
44 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Tan_Yang_jia-Hong_yang_dong-qm lseh-Tan_Yang_jia-Hong_yang_dong-qm 1
45 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Wei_guo_jia-Hong_yang_dong01-lon lseh-Wei_guo_jia-Hong_yang_dong01-lon 1
46 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Wei_guo_jia-Hong_yang_dong02-qm lseh-Wei_guo_jia-Hong_yang_dong02-qm 1
47 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Wo_ben_shi-Qiong_lin_yan-qm lseh-Wo_ben_shi-Qiong_lin_yan-qm 1
48 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Yi_lun_ming-Wen_zhao_guan-qm lseh-Yi_lun_ming-Wen_zhao_guan-qm 1
49 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lseh-Zi_na_ri-Hong_yang_dong-qm lseh-Zi_na_ri-Hong_yang_dong-qm 1
50 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Guo_liao_yi-Wen_zhao_guan02-qm lsxp-Guo_liao_yi-Wen_zhao_guan02-qm 1
51 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Huai_nan_wang-Huai_he_ying01-lon lsxp-Huai_nan_wang-Huai_he_ying01-lon 1
52 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Huai_nan_wang-Huai_he_ying02-qm lsxp-Huai_nan_wang-Huai_he_ying02-qm 1
53 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Jiang_shen_er-San_jia_dian01-1-upf lsxp-Jiang_shen_er-San_jia_dian01-1-upf 1
54 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Jiang_shen_er-San_jia_dian01-2-upf lsxp-Jiang_shen_er-San_jia_dian01-2-upf 1
55 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Jiang_shen_er-San_jia_dian02-qm lsxp-Jiang_shen_er-San_jia_dian02-qm 1
56 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm 1
57 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Quan_qian_sui-Gan_lu_si-qm lsxp-Quan_qian_sui-Gan_lu_si-qm 1
58 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Shi_ye_shuo-Ding_jun_shan-qm lsxp-Shi_ye_shuo-Ding_jun_shan-qm 1
59 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Wo_ben_shi-Kong_cheng_ji-qm lsxp-Wo_ben_shi-Kong_cheng_ji-qm 1
60 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf 1
61 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Wo_zheng_zai-Kong_cheng_ji04-qm lsxp-Wo_zheng_zai-Kong_cheng_ji04-qm 1
62 | Laosheng_jingju_a_cappella_singing_dataset_laosheng_lsxp-Xi_ri_you-Zhu_lian_zhai-qm lsxp-Xi_ri_you-Zhu_lian_zhai-qm 1
63 | Laosheng_primary_school_recording_2017121718SongRuoXuan_lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu_teacher teacher 1
64 | Laosheng_primary_school_recording_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_student_01 student_01 1
65 | Laosheng_primary_school_recording_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_student_02 student_02 1
66 | Laosheng_primary_school_recording_20171217TianHao_lseh-Wo_men_shi-Zhi_qu-sizhu_teacher teacher 1
67 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Jiang_shen_er-San_jia_dian-sizhu_teacher teacher 1
68 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_student_01 student_01 1
69 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_student_02 student_02 1
70 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu_teacher teacher 1
71 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou_student_01_sizhu student_01_sizhu 1
72 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou_teacher teacher 1
73 | Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_teacher teacher 1
74 | 


--------------------------------------------------------------------------------
/kaldi_alignment/header.txt:
--------------------------------------------------------------------------------
1 | file_utt	file	id	ali 	startinutt	dur	phone	start_utt	end_utt	start	end
2 | 


--------------------------------------------------------------------------------
/kaldi_alignment/id2phone.R:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #  id2phone.R
 4 | #  
 5 | #
 6 | #  Created by Eleanor Chodroff on 3/24/15.
 7 | #
 8 | 
 9 | phones <- read.table("/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/data/lang/phones.txt", quote="\"")
10 | segments <- read.table("/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/data/train/segments", quote="\"")
11 | ctm <- read.table("/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/exp/mono_ali/merged_alignment.txt", quote="\"")
12 | 
13 | names(ctm) <- c("file_utt","utt","start","dur","id")
14 | ctm$file <- gsub("_[0-9]*$","",ctm$file_utt)
15 | names(phones) <- c("phone","id")
16 | names(segments) <- c("file_utt","file","start_utt","end_utt")
17 | 
18 | ctm2 <- merge(ctm, phones, by="id")
19 | ctm3 <- merge(ctm2, segments, by=c("file_utt","file"))
20 | ctm3$start_real <- ctm3$start + ctm3$start_utt
21 | ctm3$end_real <- ctm3$start_utt + ctm3$start_real + ctm3$dur
22 | 
23 | write.table(ctm3, "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/exp/mono_ali/final_ali.txt", row.names=F, quote=F, sep="\t")


--------------------------------------------------------------------------------
/kaldi_alignment/path.sh:
--------------------------------------------------------------------------------
1 | export KALDI_ROOT=/Users/ronggong/Documents_using/github/kaldi
2 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
3 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
4 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
5 | . $KALDI_ROOT/tools/config/common_path.sh
6 | export LC_ALL=C
7 | 


--------------------------------------------------------------------------------
/kaldi_alignment/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | exec 5> debug_output.txt
 4 | BASH_XTRACEFD="5"
 5 | 
 6 | . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
 7 |            ## This relates to the queue.
 8 | . ./path.sh
 9 | 
10 | H=`pwd`  #exp home
11 | n=2      #parallel jobs
12 | 
13 | 
14 | for x in train test; do
15 |    perl local/prepare_stm.pl data/$x || exit 1;
16 |  	utils/fix_data_dir.sh data/$x || exit 1;
17 | done
18 | 
19 | #produce MFCC features
20 | rm -rf data/mfcc && mkdir -p data/mfcc &&  cp -R data/{train,test} data/mfcc || exit 1;
21 | for x in train test; do
22 |     #make  mfcc
23 |     steps/make_mfcc.sh --nj $n --cmd "$train_cmd" data/mfcc/$x exp/make_mfcc/$x mfcc/$x || exit 1;
24 |     #compute cmvn
25 |     steps/compute_cmvn_stats.sh data/mfcc/$x exp/mfcc_cmvn/$x mfcc/$x || exit 1;
26 | done
27 | 
28 | rm -rf data/local/ data/lang && rm -f data/dict/lexiconp.txt || exit 1;
29 | 
30 | # remove the language files for test data
31 | rm -rf data/local_test/ data/lang_test && rm -f data/dict_test/lexiconp.txt || exit 1;
32 | 
33 | #lang
34 | # prepare language stuffs for train data
35 | utils/prepare_lang.sh --sil-prob 0.0 --position_dependent_phones false data/dict "<SPOKEN_NOISE>" data/local/ data/lang || exit 1;
36 | 
37 | # prepare language stuffs for test data
38 | utils/prepare_lang.sh --sil-prob 0.0 --position_dependent_phones false data/dict_test "<SPOKEN_NOISE>" data/local_test/ data/lang_test || exit 1;
39 | 
40 | # monophone
41 | steps/train_mono.sh --boost-silence 1.0 --nj $n --cmd "$train_cmd" data/mfcc/train data/lang exp/mono || exit 1;
42 | 
43 | #monophone_ali train
44 | steps/align_si.sh --boost-silence 1.0 --nj $n --cmd "$train_cmd" data/mfcc/train data/lang exp/mono exp/mono_ali || exit 1;
45 | 
46 | word level alignment
47 | steps/get_train_ctm.sh data/mfcc/train data/lang exp/mono_ali || exit 1;
48 | 
49 | for n in `seq $n`; do gunzip -k -f exp/mono_ali/ctm.$n.gz; done || exit 1;
50 | 
51 |  . ./ali2Phones.sh
52 | 
53 | #monophone_ali test
54 | steps/align_si.sh --boost-silence 1.0 --nj $n --cmd "$train_cmd" data/mfcc/test data/lang_test exp/mono exp/mono_test_ali || exit 1;
55 | 
56 | # word level alignment
57 | steps/get_train_ctm.sh data/mfcc/test data/lang_test exp/mono_test_ali || exit 1;
58 | 
59 | for n in `seq $n`; do gunzip -k -f exp/mono_test_ali/ctm.$n.gz; done || exit 1;
60 | 
61 | steps/get_prons.sh data/mfcc/test data/lang_test exp/mono_test_ali
62 | 
63 | gunzip -c exp/mono_test_ali/prons.*.gz | utils/sym2int.pl -f 4 data/lang_test/words.txt | utils/sym2int.pl -f 5- data/lang_test/phones.txt
64 | 


--------------------------------------------------------------------------------
/kaldi_alignment/splitAlignments.py:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #  splitAlignments.py
 4 | #  
 5 | #
 6 | #  Created by Eleanor Chodroff on 3/25/15.
 7 | #
 8 | #
 9 | #
10 | import sys
11 | import csv
12 | import os
13 | 
14 | results=[]
15 | 
16 | # name = name of first text file in final_ali.txt
17 | # name_fin = name of final text file in final_ali.txt
18 | 
19 | name = "Dan_jingju_a_cappella_singing_dataset_danAll_daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon"
20 | name_fin = "Laosheng_primary_school_recording_20171217TianHao_lsxp-Wei_guo_jia-Hong_yang_dong-sizhu_teacher"
21 | 
22 | try:
23 |     with open("./exp/mono_ali/final_ali.txt") as f:
24 |         next(f) #skip header
25 |         for line in f:
26 |             columns=line.split("\t")
27 |             name_prev = name
28 |             dataset = name_prev.split('_')[0]
29 |             name = columns[1]
30 |             if (name_prev != name):
31 |                 try:
32 |                     path_roletype = os.path.join('./splitAli',dataset.lower())
33 |                     if not os.path.exists(path_roletype):
34 |                         os.makedirs(path_roletype)
35 |                     with open(os.path.join('./splitAli',dataset.lower(),name_prev[len(dataset)+1:]+".txt"),'w') as fwrite:
36 |                         writer = csv.writer(fwrite)
37 |                         fwrite.write("\n".join(results))
38 |                         fwrite.close()
39 |                 #print name
40 |                 except Exception as e:
41 |                     print("Failed to write file", e)
42 |                     sys.exit(2)
43 |                 del results[:]
44 |                 results.append(line[0:-1])
45 |             else:
46 |                 results.append(line[0:-1])
47 | except Exception as e:
48 |     print("Failed to read file", e)
49 |     sys.exit(1)
50 | # this prints out the last textfile (nothing following it to compare with)
51 | try:
52 |     with open(os.path.join('./splitAli', dataset.lower(), name_prev[len(dataset)+1:]+".txt"),'w') as fwrite:
53 |         writer = csv.writer(fwrite)
54 |         fwrite.write("\n".join(results))
55 |         fwrite.close()
56 |                 #print name
57 | except Exception as e:
58 |     print("Failed to write file", e)
59 |     sys.exit(2)


--------------------------------------------------------------------------------
/kaldi_alignment/srcPy/csv_prepossessing.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | 
 4 | def open_csv_recordings(filename):
 5 |     recordings = []
 6 |     with open(filename) as csvfile:
 7 |         readCSV = csv.reader(csvfile, delimiter=',')
 8 |         for row in readCSV:
 9 |             recordings.append(row)
10 |     return recordings
11 | 
12 | 
13 | def write_csv_two_columns_list(two_columns_list, filename):
14 |     with open(filename, 'wb') as csvfile:
15 |         two_columns_writer = csv.writer(csvfile, delimiter=',')
16 |         for l in two_columns_list:
17 |             two_columns_writer.writerow(l)
18 | 


--------------------------------------------------------------------------------
/kaldi_alignment/srcPy/filePath.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from os.path import join
 3 | from kaldi_alignment.srcPy.textgridParser import syllableTextgridExtraction
 4 | from kaldi_alignment.srcPy.csv_prepossessing import open_csv_recordings
 5 | 
 6 | path_root = '/Users/ronggong/Documents_using/MTG_document/Jingju_arias/'
 7 | 
 8 | path_nacta = 'jingju_a_cappella_singing_dataset'
 9 | path_nacta2017 = 'jingju_a_cappella_singing_dataset_extended_nacta2017'
10 | path_primary = 'primary_school_recording'
11 | 
12 | dataset_laosheng = 'qmLonUpf/laosheng'
13 | dataset_ss = 'sourceSeparation'
14 | 
15 | path_data_train = '../data/train'
16 | path_data_dev = '../data/dev'
17 | path_data_test = '../data/test'
18 | path_data_LM = '../data/LM'
19 | 
20 | path_lang = '../data/dict'
21 | 
22 | recordings_train = open_csv_recordings("mispronunciation_filelist_train.csv")
23 | recordings_test = open_csv_recordings("mispronunciation_filelist_test.csv")
24 | 
25 | 
26 | def getRecordings(wav_path):
27 |     recordings = []
28 |     for root, subFolders, files in os.walk(wav_path):
29 |         for f in files:
30 |             file_prefix, file_extension = os.path.splitext(f)
31 |             if file_prefix != '.DS_Store':
32 |                 recordings.append(file_prefix)
33 | 
34 |     return recordings
35 | 
36 | 
37 | def parse_recordings(rec):
38 |     if rec[0] == "part1":
39 |         data_path = path_nacta
40 |         sub_folder = rec[2]
41 |         textgrid_folder = "textgrid"
42 |         wav_folder = "wav_left"
43 |         syllable_tier = "dian"
44 |         if rec[3][:2] == 'da':
45 |             roletype = 'Dan'
46 |         elif rec[3][:2] == 'ls':
47 |             roletype = 'Laosheng'
48 |         else:
49 |             raise ValueError("Not exist a role-type {} for file {}".format(rec[3][:2], rec))
50 |     elif rec[0] == "part2":
51 |         data_path = path_nacta2017
52 |         sub_folder = rec[2]
53 |         textgrid_folder = "textgridDetails"
54 |         wav_folder = "wav"
55 |         syllable_tier = "dianSilence"
56 |         if rec[3][:2] == 'da':
57 |             roletype = 'Dan'
58 |         elif rec[3][:2] == 'ls':
59 |             roletype = 'Laosheng'
60 |         else:
61 |             raise ValueError("Not exist a role-type {} for file {}".format(rec[3][:2], rec))
62 |     else:
63 |         data_path = path_primary
64 |         sub_folder = rec[1] + "/" + rec[2]
65 |         textgrid_folder = "textgrid"
66 |         wav_folder = "wav_left"
67 |         syllable_tier = "dianSilence"
68 |         if rec[2][:2] == 'da':
69 |             roletype = 'Dan'
70 |         elif rec[2][:2] == 'ls':
71 |             roletype = 'Laosheng'
72 |         else:
73 |             raise ValueError("Not exist a role-type {} for file {}".format(rec[2][:2], rec))
74 | 
75 |     filename = rec[3]
76 |     line_tier = "line"
77 |     longsyllable_tier = "longsyllable"
78 |     phoneme_tier = "details"
79 |     special_tier = "special"
80 |     special_class_tier = "specialClass"
81 | 
82 |     return data_path, sub_folder, textgrid_folder, \
83 |            wav_folder, filename, line_tier, \
84 |            longsyllable_tier, syllable_tier, phoneme_tier, \
85 |            special_tier, special_class_tier, roletype
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     for fn in getRecordings(os.path.join(path_root, path_nacta, 'textgrid', 'laosheng')):
90 |         print('[\'laosheng\', \''+fn+'\'],')


--------------------------------------------------------------------------------
/kaldi_alignment/srcPy/mispron_eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def parse_text_to_dict(filename):
 5 |     dict_text = {}
 6 |     with open(filename) as file:
 7 |         for row in file.readlines():
 8 |             row = row.replace('\n', '')
 9 |             key = row.split(' ')[0]
10 |             val = row.split(' ')[1:]
11 |             dict_text[key] = val
12 |     return dict_text
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     path_test_ali = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/exp/mono_test_ali/"
17 |     path_test = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/data/test"
18 | 
19 |     filename_groundtruth_teacher = os.path.join(path_test, 'text_teacher')
20 |     filename_groundtruth_student = os.path.join(path_test, 'text_student')
21 |     filename_decoded_student = os.path.join(path_test_ali, 'text_decoded')
22 | 
23 |     dict_groundtruth_teacher = parse_text_to_dict(filename_groundtruth_teacher)
24 |     dict_groundtruth_student = parse_text_to_dict(filename_groundtruth_student)
25 |     dict_decoded_student = parse_text_to_dict(filename_decoded_student)
26 | 
27 |     special_correct_stu = 0
28 |     special_mis_stu = 0
29 |     jianzi_correct_stu = 0
30 |     jianzi_mis_stu = 0
31 | 
32 |     special_correct_tea, special_mis_tea, jianzi_correct_tea, jianzi_mis_tea = 0, 0, 0, 0
33 |     for key in dict_decoded_student:
34 |         val_decoded_stu = dict_decoded_student[key]
35 |         val_gt_tea = dict_groundtruth_teacher[key]
36 |         val_gt_tea_syl = [val for ii, val in enumerate(val_gt_tea) if ii % 2.0 == 0]
37 |         val_gt_tea_class = [val for ii, val in enumerate(val_gt_tea) if ii % 2.0 != 0]
38 |         val_gt_stu = dict_groundtruth_student[key]
39 | 
40 |         assert len(val_gt_tea_syl) == len(val_gt_stu) == len(val_decoded_stu)
41 | 
42 |         for ii, syl_class in enumerate(val_gt_tea_class):
43 |             if syl_class == '1':
44 |                 if val_gt_stu[ii] == val_decoded_stu[ii]:
45 |                     special_correct_stu += 1
46 |                 else:
47 |                     special_mis_stu += 1
48 |             elif syl_class == '2':
49 |                 if val_gt_stu[ii] == val_decoded_stu[ii]:
50 |                     jianzi_correct_stu += 1
51 |                 else:
52 |                     jianzi_mis_stu += 1
53 | 
54 |         for ii, syl_class in enumerate(val_gt_tea_class):
55 |             if syl_class == '1':
56 |                 if val_gt_tea_syl[ii] == val_gt_stu[ii]:
57 |                     if val_gt_tea_syl[ii] == val_decoded_stu[ii]:
58 |                         special_correct_tea += 1
59 |                     else:
60 |                         special_mis_tea += 1
61 |                 else:
62 |                     if val_gt_tea_syl[ii] == val_decoded_stu[ii]:
63 |                         special_mis_tea += 1
64 |                     else:
65 |                         special_correct_tea += 1
66 |             elif syl_class == '2':
67 |                 if val_gt_tea_syl[ii] == val_gt_stu[ii]:
68 |                     if val_gt_tea_syl[ii] == val_decoded_stu[ii]:
69 |                         jianzi_correct_tea += 1
70 |                     else:
71 |                         jianzi_mis_tea += 1
72 |                 else:
73 |                     if val_gt_tea_syl[ii] == val_decoded_stu[ii]:
74 |                         jianzi_mis_tea += 1
75 |                     else:
76 |                         jianzi_correct_tea += 1
77 | 
78 |     print(special_correct_stu, special_mis_stu, jianzi_correct_stu, jianzi_mis_stu)
79 |     print(special_correct_tea, special_mis_tea, jianzi_correct_tea, jianzi_mis_tea)
80 | 
81 | 


--------------------------------------------------------------------------------
/kaldi_alignment/srcPy/mispronunciation_filelist_test.csv:
--------------------------------------------------------------------------------
 1 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student01,,
 2 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student02,,
 3 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student03,,
 4 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student04,,
 5 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student01,,
 6 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student02,,
 7 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student03,,
 8 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student04,,
 9 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student05,,
10 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student06,,
11 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student07,,
12 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_01,,
13 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_02,,
14 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_03,,
15 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_04,,
16 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_05,,
17 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_02,,
18 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_01,,
19 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_02,,
20 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_03,,
21 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_04,,
22 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_05,,
23 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_06,,
24 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_01,,
25 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_02,,
26 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_04_mentougou,,
27 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_01,,
28 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_02,,
29 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_03,,
30 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_01_dxjky,,
31 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_01_sizhu,,
32 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_02_dxjky,,
33 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_03_dxjky,,
34 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_04_dxjky,,
35 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_05_dxjky,,
36 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_06_mentougou,,


--------------------------------------------------------------------------------
/kaldi_alignment/srcPy/mispronunciation_filelist_train.csv:
--------------------------------------------------------------------------------
 1 | part1,,danAll,dafeh-Bi_yun_tian-Xi_xiang_ji01-qm,,
 2 | part1,,danAll,danbz-Bei_jiu_chan-Chun_gui_men01-qm,,
 3 | part1,,danAll,danbz-Kan_dai_wang-Ba_wang_bie_ji01-qm,,
 4 | part1,,danAll,daspd-Hai_dao_bing-Gui_fei_zui_jiu02-qm,,
 5 | part1,,danAll,daxp-Chun_qiu_ting-Suo_lin_nang01-qm,,
 6 | part1,,danAll,daxp-Jiao_Zhang_sheng-Hong_niang01-qm,,
 7 | part1,,danAll,daxp-Jiao_Zhang_sheng-Hong_niang04-qm,,
 8 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai02-qm,,
 9 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai04-qm,,
10 | part1,,danAll,daxp-Zhe_cai_shi-Suo_lin_nang01-qm,,
11 | part1,,laosheng,lseh-Tan_Yang_jia-Hong_yang_dong-qm,,
12 | part1,,laosheng,lseh-Wei_guo_jia-Hong_yang_dong02-qm,,
13 | part1,,laosheng,lseh-Wo_ben_shi-Qiong_lin_yan-qm,,
14 | part1,,laosheng,lseh-Yi_lun_ming-Wen_zhao_guan-qm,,
15 | part1,,laosheng,lseh-Zi_na_ri-Hong_yang_dong-qm,,
16 | part1,,laosheng,lsxp-Guo_liao_yi-Wen_zhao_guan02-qm,,
17 | part1,,laosheng,lsxp-Huai_nan_wang-Huai_he_ying02-qm,,
18 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian02-qm,,
19 | part1,,laosheng,lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm,,
20 | part1,,laosheng,lsxp-Quan_qian_sui-Gan_lu_si-qm,,
21 | part1,,laosheng,lsxp-Shi_ye_shuo-Ding_jun_shan-qm,,
22 | part1,,laosheng,lsxp-Wo_ben_shi-Kong_cheng_ji-qm,,
23 | part1,,laosheng,lsxp-Wo_zheng_zai-Kong_cheng_ji04-qm,,
24 | part1,,laosheng,lsxp-Xi_ri_you-Zhu_lian_zhai-qm,,
25 | part1,,danAll,daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon,,
26 | part1,,danAll,daspd-Du_shou_kong-Wang_jiang_ting-upf,,
27 | part1,,danAll,daspd-Hai_dao_bing-Gui_fei_zui_jiu01-lon,,
28 | part1,,danAll,daxp-Guan_Shi_yin-Tian_nv_san_hua-lon,,
29 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai01-upf,,
30 | part1,,laosheng,lseh-Wei_guo_jia-Hong_yang_dong01-lon,,
31 | part1,,laosheng,lsxp-Huai_nan_wang-Huai_he_ying01-lon,,
32 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian01-1-upf,,
33 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian01-2-upf,,
34 | part1,,laosheng,lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf,,
35 | part2,,20170327LiaoJiaNi,lsxp-Yi_ma_li-Wu_jia_po-nacta,,
36 | part2,,20170327LiaoJiaNi,lseh-Niang_zi_bu-Sou_gu_jiu-nacta,,
37 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,teacher,,
38 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,teacher,,
39 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,teacher,,
40 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,teacher,,
41 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,teacher,,
42 | part3,20171214SongRuoXuan,daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo,teacher,,
43 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,teacher,,
44 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,teacher,,
45 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,teacher,,
46 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,teacher,,
47 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,teacher,,
48 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,teacher,,
49 | part3,20171217TianHao,lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou,teacher,,
50 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student01,,
51 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student02_first_half,,
52 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student02,,
53 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student03,,
54 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student04,,
55 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student05,,
56 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student06,,
57 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student01,,
58 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student02,,
59 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student03,,
60 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student04,,
61 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student05,,
62 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student06,,
63 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_01,,
64 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_02,,
65 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_03,,
66 | part3,20171214SongRuoXuan,daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo,student_01,,
67 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_01,,
68 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_03,,
69 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,student_01,,
70 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,student_02,,
71 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,student_01,,
72 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,student_02,,
73 | part3,20171217TianHao,lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou,student_01_sizhu,,


--------------------------------------------------------------------------------
/kaldi_alignment/srcPy/parseLang.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | from kaldi_alignment.srcPy.filePath import *
  4 | from kaldi_alignment.srcPy.textgridParser import syllableTextgridExtraction
  5 | 
  6 | 
  7 | def collectLexicon(path_textgrid, recording, tier0, tier1, lexicon):
  8 |     nestedSyllableLists, numLines, numSyllables = syllableTextgridExtraction(path_textgrid,
  9 |                                                                              recording,
 10 |                                                                              tier0,
 11 |                                                                              tier1)
 12 |     for syl in nestedSyllableLists:
 13 |         lexicon_unit = syl[0][2].strip().upper()+' '
 14 |         for ii_pho, pho in enumerate(syl[1]):
 15 |             # if pho[2] == '?':
 16 |             # 	continue
 17 |             if not len(pho[2]) and ii_pho >= len(syl[1])-1:
 18 |                 lexicon_unit = lexicon_unit[:-1]  # remove the last space
 19 |                 break
 20 |             if len(pho[2]):
 21 |                 lexicon_unit += pho[2]
 22 |             else:
 23 |                 lexicon_unit += 'sil_phone'
 24 |             if ii_pho != len(syl[1])-1:
 25 |                 lexicon_unit += ' '  # add a space in the end of the char
 26 |         lexicon.append(lexicon_unit)
 27 |     return lexicon
 28 | 
 29 | 
 30 | def collect_lexicon_syllable_special(path_textgrid, recording, syllable_tier, special_tier, phoneme_tier, lexicon):
 31 |     nestedSyllableLists, numLines, numSyllables = syllableTextgridExtraction(path_textgrid,
 32 |                                                                              recording,
 33 |                                                                              syllable_tier,
 34 |                                                                              phoneme_tier)
 35 | 
 36 |     nestedSpecialLists, numLines, numSpecial = syllableTextgridExtraction(path_textgrid,
 37 |                                                                           recording,
 38 |                                                                           special_tier,
 39 |                                                                           phoneme_tier)
 40 |     for ii_syl, syl in enumerate(nestedSyllableLists):
 41 |         lexicon_unit = syl[0][2].strip().upper() + ' ' + nestedSpecialLists[ii_syl][0][2].strip().upper() + ' '
 42 |         for ii_pho, pho in enumerate(syl[1]):
 43 |             # if pho[2] == '?':
 44 |             # 	continue
 45 |             if not len(pho[2]) and ii_pho >= len(syl[1])-1:
 46 |                 lexicon_unit = lexicon_unit[:-1]  # remove the last space
 47 |                 break
 48 |             if len(pho[2]):
 49 |                 lexicon_unit += pho[2]
 50 |             else:
 51 |                 lexicon_unit += 'sil_phone'
 52 |             if ii_pho != len(syl[1])-1:
 53 |                 lexicon_unit += ' '  # add a space in the end of the char
 54 |         lexicon.append(lexicon_unit)
 55 |     return lexicon
 56 | 
 57 | 
 58 | def organizeRepetition(lexicon, repetition=False):
 59 |     """
 60 |     give the syllable repetition different name
 61 |     """
 62 |     names_syllable 		= {}
 63 |     lexicon_organized 	= []
 64 |     dict_lexicon_organized = {}
 65 |     for l in lexicon:
 66 |         syls = l.split()
 67 | 
 68 |         if repetition:
 69 |             if syls[0] not in names_syllable.keys():
 70 |                 names_syllable[syls[0]] = 0
 71 |             else:
 72 |                 names_syllable[syls[0]] += 1
 73 | 
 74 |             syls[0] = syls[0]+str(names_syllable[syls[0]])
 75 | 
 76 |         lexicon_unit = ' '.join(syls)
 77 | 
 78 |         # remove repetition
 79 |         if lexicon_unit not in lexicon_organized:
 80 |             lexicon_organized.append(lexicon_unit)
 81 |             dict_lexicon_organized[syls[0]] = syls[1:]
 82 |     return lexicon_organized, dict_lexicon_organized
 83 | 
 84 | 
 85 | def organize_repetition_syllable_special(lexicon, repetition=False):
 86 |     """
 87 |     give the syllable repetition different name, lexicon contains syllable and special
 88 |     """
 89 |     names_syllable 		= {}
 90 |     lexicon_organized 	= []
 91 |     dict_lexicon_organized = {}
 92 | 
 93 |     for l in lexicon:
 94 |         print(l)
 95 |         syls = l[0].split()
 96 | 
 97 |         if repetition:
 98 |             if syls[0] not in names_syllable.keys():
 99 |                 names_syllable[syls[0]] = 0
100 |             else:
101 |                 names_syllable[syls[0]] += 1
102 | 
103 |             syls[0] = syls[0]+str(names_syllable[syls[0]])
104 | 
105 |         lexicon_unit = [' '.join(syls), l[1:]]
106 | 
107 |         # remove repetition
108 |         if lexicon_unit[0] not in lexicon_organized:
109 |             lexicon_organized.append(lexicon_unit[0])
110 |             dict_lexicon_organized[syls[0]] = [syls[1:], l[1:]]
111 |     return lexicon_organized, dict_lexicon_organized
112 | 
113 | 
114 | def writeLexicon(path_lang, lexicon, repetition=False):
115 |     if repetition:
116 |         filename_lexicon = 'lexicon.txt'
117 |     else:
118 |         filename_lexicon = 'lexicon_no_rep.txt'
119 | 
120 |     with open(os.path.join(path_lang, filename_lexicon), "w") as f:
121 |         f.write('SIL sil\n')
122 |         # f.write('SIL_PHONE sil_phone\n')
123 |         # f.write('SILDAN silDan\n')
124 |         f.write('<SPOKEN_NOISE> sil\n')
125 | 
126 |         for l in lexicon:
127 |             f.write(l)
128 |             f.write('\n')
129 | 
130 | 
131 | if __name__ == '__main__':
132 | 
133 |     # train: organize lexicon with repetition,
134 |     # test: organize lexicon without repetition.
135 |     train_test = 'test'
136 | 
137 |     lexicon = []
138 | 
139 |     if train_test == 'train':
140 | 
141 |         for rec in recordings_train+recordings_test:
142 |             data_path, sub_folder, textgrid_folder, \
143 |             wav_folder, filename, line_tier, longsyllable_tier, syllable_tier, \
144 |             phoneme_tier, special_tier, special_class_tier, roletype = parse_recordings(rec)
145 | 
146 |             lexicon = collectLexicon(path_textgrid=os.path.join(path_root, data_path, textgrid_folder, sub_folder),
147 |                                      recording=filename,
148 |                                      tier0=special_tier,
149 |                                      tier1=phoneme_tier,
150 |                                      lexicon=lexicon)
151 | 
152 |         lexicon = list(set(lexicon))
153 | 
154 |         lexicon_organized, dict_lexicon_organized = organizeRepetition(lexicon, repetition=True)
155 | 
156 |         writeLexicon(path_lang, lexicon_organized, repetition=True)
157 | 
158 |         with open(os.path.join(path_lang, "dict_lexicon_repetition.json"), "w") as write_file:
159 |             json.dump(dict_lexicon_organized, write_file)
160 |     else:
161 |         lexicon_special = []
162 |         lexicon_syllable_special = []
163 | 
164 |         for rec in recordings_train+recordings_test:
165 |             data_path, sub_folder, textgrid_folder, \
166 |             wav_folder, filename, line_tier, longsyllable_tier, syllable_tier, \
167 |             phoneme_tier, special_tier, special_class_tier, roletype = parse_recordings(rec)
168 | 
169 |             lexicon = collectLexicon(path_textgrid=os.path.join(path_root, data_path, textgrid_folder, sub_folder),
170 |                                      recording=filename,
171 |                                      tier0=syllable_tier,
172 |                                      tier1=phoneme_tier,
173 |                                      lexicon=lexicon)
174 | 
175 |             lexicon_special = collectLexicon(
176 |                 path_textgrid=os.path.join(path_root, data_path, textgrid_folder, sub_folder),
177 |                 recording=filename,
178 |                 tier0=special_tier,
179 |                 tier1=phoneme_tier,
180 |                 lexicon=lexicon_special)
181 | 
182 |             lexicon_syllable_special = collect_lexicon_syllable_special(path_textgrid=os.path.join(path_root, data_path, textgrid_folder, sub_folder),
183 |                                                                         recording=filename,
184 |                                                                         syllable_tier=syllable_tier,
185 |                                                                         special_tier=special_tier,
186 |                                                                         phoneme_tier=phoneme_tier,
187 |                                                                         lexicon=lexicon_syllable_special)
188 | 
189 |         lexicon = list(set(lexicon))
190 | 
191 |         lexicon_special = list(set(lexicon_special))
192 | 
193 |         lexicon_syllable_special = list(set(lexicon_syllable_special))
194 | 
195 |         # get a list ['SYL phn0 phn1 phn2', 'SPECIAL']
196 |         lexicon_remove_rep = []
197 |         for pron_special in lexicon_special:
198 |             lexicon_unit = [pron_special]
199 |             for word_entry in lexicon_syllable_special:
200 |                 syl = word_entry.split(' ')[0]
201 |                 if pron_special == ' '.join(word_entry.split(' ')[1:]):
202 |                     lexicon_unit.append(syl)
203 |             lexicon_remove_rep.append(lexicon_unit)
204 | 
205 |         lexicon_organized, dict_lexicon_organized = organizeRepetition(lexicon, repetition=False)
206 | 
207 |         # dict_lexicon_organized_syllable_special, {SPECIAL: [[phn0 phn1 phn2], [SYL0 SYL1]]}
208 |         lexicon_organized_syllable_special, dict_lexicon_organized_syllable_special = \
209 |             organize_repetition_syllable_special(lexicon_remove_rep, repetition=True)
210 | 
211 |         with open(os.path.join(path_lang, "dict_lexicon_repetition_syllable_special.json"), "w") as write_file:
212 |             json.dump(dict_lexicon_organized_syllable_special, write_file)


--------------------------------------------------------------------------------
/kaldi_alignment/srcPy/parse_decoded_pronunciation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | write the decoded text for test set
 3 | """
 4 | import json
 5 | from kaldi_alignment.srcPy.filePath import *
 6 | 
 7 | 
 8 | def open_decoded_pronunciation(filename):
 9 |     utt = []
10 |     with open(filename) as file:
11 |         for row in file.readlines():
12 |             utt.append(row.replace('<s>\t', '').replace('\t</s>\n', ''))
13 |     return utt
14 | 
15 | 
16 | def parse_lexicon_to_list(lexicon):
17 |     list_lexicon = []
18 |     with open(lexicon) as file:
19 |         for row in file.readlines():
20 |             row = row.replace('\n', '')
21 |             list_lexicon.append([row.split(' ')[0], row.split(' ')[1:]])
22 |     return list_lexicon
23 | 
24 | 
25 | def lexicon_finder(dict_lexicon_organized, pho_list):
26 |     """
27 |     find the corresponding pho_list in lexicon organized
28 |     """
29 |     for syl_organized, dict_pho_list in dict_lexicon_organized.items():
30 |         if pho_list == dict_pho_list:
31 |             return syl_organized
32 | 
33 |     raise ValueError("Not found word entry for {}".format(pho_list))
34 | 
35 | 
36 | if __name__ == "__main__":
37 | 
38 |     path_test_ali = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/exp/mono_test_ali/"
39 |     path_lang_test = "/Users/ronggong/PycharmProjects/mispronunciation-detection/kaldi_alignment/data/dict_test"
40 |     filename_decoded_pronunciation = os.path.join(path_test_ali, "pron_perutt_nowb.txt")
41 | 
42 |     list_lexicon = parse_lexicon_to_list(os.path.join(path_lang_test, 'lexicon.txt'))
43 | 
44 |     with open(os.path.join(path_lang, "dict_lexicon_repetition_syllable_special.json"), "r") as read_file:
45 |         dict_lexicon = json.load(read_file)
46 | 
47 |     utts = open_decoded_pronunciation(filename_decoded_pronunciation)
48 | 
49 |     with open(os.path.join(path_test_ali, 'text_decoded'), "w") as f:
50 |         for utt in utts:
51 |             utt_list = utt.split('\t')
52 |             utt_organized = [utt_list[0]]
53 | 
54 |             for pho_list in utt_list[1:]:
55 |                 if pho_list != 'SIL sil':
56 |                     # find all the pronunciations for the syl in repetitive lexicon
57 |                     syl = pho_list.split(' ')[0]
58 |                     pron_decoded = pho_list.split(' ')[1:]
59 | 
60 |                     # gather all the special pronunciation for the syl
61 |                     list_syllable_special_unit = []
62 |                     for special, pron_syl in dict_lexicon.items():
63 |                         if syl in pron_syl[1]:
64 |                             list_syllable_special_unit.append([special, pron_syl[0]])
65 | 
66 |                     # match the special pronunciation
67 |                     for special, pron in list_syllable_special_unit:
68 |                         if pron_decoded == pron:
69 |                             utt_organized.append(''.join([i for i in special if not i.isdigit()]))
70 |                             break
71 |             f.write(' '.join(utt_organized)+'\n')
72 | 


--------------------------------------------------------------------------------
/kaldi_alignment/srcPy/textgridParser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | 
  4 | import kaldi_alignment.srcPy.textgrid as tgp
  5 | 
  6 | 
  7 | def textGrid2WordList(textgrid_file, whichTier = 'pinyin', utf16 = True):
  8 |     '''
  9 |     parse textGrid into a python list of tokens 
 10 |     @param whichTier : 'pinyin' default tier name  
 11 |     '''	
 12 |     if not os.path.isfile(textgrid_file): raise Exception("file {} not found".format(textgrid_file))
 13 |     beginTsAndWordList = []
 14 | 
 15 |     if utf16:
 16 |         par_obj = tgp.TextGrid.loadUTF16(textgrid_file)	#loading the object
 17 |     else:
 18 |         par_obj = tgp.TextGrid.load(textgrid_file)	#loading the object
 19 | 
 20 |     tiers= tgp.TextGrid._find_tiers(par_obj)	#finding existing tiers		
 21 | 	
 22 |     isTierFound = False
 23 |     for tier in tiers:
 24 |         tierName= tier.tier_name().replace('.','')
 25 |         #iterating over tiers and selecting the one specified
 26 |         if tierName == whichTier:
 27 |             isTierFound = True
 28 |             #this function parse the file nicely and return cool tuples
 29 |             tier_details = tier.make_simple_transcript()
 30 | 
 31 |             for line in tier_details:
 32 |                 beginTsAndWordList.append([float(line[0]), float(line[1]), line[2]])
 33 | 
 34 |     if not isTierFound:
 35 |         print('Missing tier {1} in file {0}' .format(textgrid_file, whichTier))
 36 | 
 37 |     return beginTsAndWordList, isTierFound
 38 | 
 39 | 
 40 | def line2WordList(line, entireWordList):
 41 |     '''
 42 |     find the nested wordList of entireWordList by line tuple
 43 |     :param line: line tuple [startTime, endTime, string]
 44 |     :param entireWordList: entire word list
 45 |     :return: nested wordList
 46 |     '''
 47 |     nestedWordList = []
 48 |     vault = False
 49 |     for wordlist in entireWordList:
 50 |          # the ending of the line
 51 |         if wordlist[1] == line[1]:
 52 |             nestedWordList.append(wordlist)
 53 |             break
 54 |         # the beginning of the line
 55 |         if wordlist[0] == line[0]:
 56 |             vault = True
 57 |         if vault == True:
 58 |             nestedWordList.append(wordlist)
 59 | 
 60 |     return nestedWordList
 61 | 
 62 | 
 63 | def wordListsParseByLines(entireLine, entireWordList):
 64 |     '''
 65 |     find the wordList for each line, cut the word list according to line
 66 |     :param entireLine: entire lines in line tier
 67 |     :param entirewWordList: entire word lists in pinyin tier
 68 |     :return:
 69 |     nestedWordLists: [[line0, wordList0], [line1, wordList1], ...]
 70 |     numLines: sum of number of lines
 71 |     numWords: sum of number of words
 72 |     '''
 73 |     nestedWordLists     = []
 74 |     numLines            = 0
 75 |     numWords            = 0
 76 | 
 77 |     for line in entireLine:
 78 |         # asciiLine=line[2].encode("ascii", "replace")
 79 |         asciiLine = line[2]
 80 |         if len(asciiLine.replace(" ", "")):                                      # if line is not empty
 81 |             numLines        += 1
 82 |             nestedWordList  = []
 83 |             wordList        = line2WordList(line, entireWordList)
 84 |             for word in wordList:
 85 |                 # asciiWord = word[2].encode("ascii", "replace")
 86 |                 asciiWord = word[2]
 87 |                 # if len(asciiWord.replace(" ","")):                              # if word is not empty
 88 |                 numWords += 1
 89 |                 nestedWordList.append(word)
 90 |             nestedWordLists.append([line,nestedWordList])
 91 | 
 92 |     return nestedWordLists, numLines, numWords
 93 | 
 94 | 
 95 | def syllableTextgridExtraction(textgrid_path, recording, tier0, tier1):
 96 | 
 97 |     '''
 98 |     Extract syllable boundary and phoneme boundary from textgrid
 99 |     :param textgrid_path:
100 |     :param recording:
101 |     :param tier0: parent tier
102 |     :param tier1: child tier which should be covered by parent tier
103 |     :return:
104 |     nestedPhonemeList, element[0] - syllable, element[1] - a list containing the phoneme of the syllable
105 |     '''
106 | 
107 |     print(textgrid_path, recording)
108 |     textgrid_file   = os.path.join(textgrid_path, recording+'.TextGrid')
109 | 
110 |     syllableList, _    = textGrid2WordList(textgrid_file, whichTier=tier0)
111 |     phonemeList, _     = textGrid2WordList(textgrid_file, whichTier=tier1)
112 | 
113 |     # parse syllables of groundtruth
114 |     nestedPhonemeLists, numSyllables, numPhonemes   = wordListsParseByLines(syllableList, phonemeList)
115 | 
116 |     return nestedPhonemeLists, numSyllables, numPhonemes
117 | 
118 | 


--------------------------------------------------------------------------------
/neural_net/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/__init__.py


--------------------------------------------------------------------------------
/neural_net/combine_feature_label.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | from neural_net.file_path import *
 4 | 
 5 | 
 6 | def combine_feature_label(dict_positive, dict_negative):
 7 |     """
 8 |     Combine positive and negative features and labels into two lists
 9 |     :param dict_positive:
10 |     :param dict_negative:
11 |     :return:
12 |     """
13 |     X = []
14 |     y = []
15 |     for key in dict_positive:
16 |         X += dict_positive[key]
17 |         y += [1]*len(dict_positive[key])
18 | 
19 |     for key in dict_negative:
20 |         X += dict_negative[key]
21 |         y += [0]*len(dict_negative[key])
22 | 
23 |     return X, np.array(y)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     with open(dict_special_positive, "rb") as f:
28 |         feature_special_pos = pickle.load(f)
29 | 
30 |     with open(dict_special_negative, "rb") as f:
31 |         feature_special_neg = pickle.load(f)
32 | 
33 |     with open(dict_jianzi_positive, "rb") as f:
34 |         feature_jianzi_pos = pickle.load(f)
35 | 
36 |     with open(dict_jianzi_negative, "rb") as f:
37 |         feature_jianzi_neg = pickle.load(f)
38 | 
39 |     X_special, y_special = combine_feature_label(dict_positive=feature_special_pos,
40 |                                                  dict_negative=feature_special_neg)
41 | 
42 |     X_jianzi, y_jianzi = combine_feature_label(dict_positive=feature_jianzi_pos,
43 |                                                dict_negative=feature_jianzi_neg)
44 | 
45 |     print(np.count_nonzero(y_special), len(y_special))
46 |     print(np.count_nonzero(y_jianzi), len(y_jianzi))


--------------------------------------------------------------------------------
/neural_net/data/mispronunciation_filelist_test.csv:
--------------------------------------------------------------------------------
 1 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student01,,
 2 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student02,,
 3 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student03,,
 4 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,student04,,
 5 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student01,,
 6 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student02,,
 7 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student03,,
 8 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student04,,
 9 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student05,,
10 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student06,,
11 | part3,20171211SongRuoXuan,daxp-Meng_ting_de-Mu_gui_ying_gua_shuai-dxjky,student07,,
12 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_01,,
13 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_02,,
14 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_03,,
15 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_04,,
16 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,student_05,,
17 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_02,,
18 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_01,,
19 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_02,,
20 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_03,,
21 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_04,,
22 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_05,,
23 | part3,20171215SongRuoXuan,daxp-Jiao_zhang_sheng-Xi_shi-qianmen,student_06,,
24 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_01,,
25 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_02,,
26 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,student_04_mentougou,,
27 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_01,,
28 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_02,,
29 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,student_03,,
30 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_01_dxjky,,
31 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_01_sizhu,,
32 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_02_dxjky,,
33 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_03_dxjky,,
34 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_04_dxjky,,
35 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_05_dxjky,,
36 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,student_06_mentougou,,


--------------------------------------------------------------------------------
/neural_net/data/mispronunciation_filelist_train.csv:
--------------------------------------------------------------------------------
 1 | part1,,danAll,dafeh-Bi_yun_tian-Xi_xiang_ji01-qm,,
 2 | part1,,danAll,danbz-Bei_jiu_chan-Chun_gui_men01-qm,,
 3 | part1,,danAll,danbz-Kan_dai_wang-Ba_wang_bie_ji01-qm,,
 4 | part1,,danAll,daspd-Hai_dao_bing-Gui_fei_zui_jiu02-qm,,
 5 | part1,,danAll,daxp-Chun_qiu_ting-Suo_lin_nang01-qm,,
 6 | part1,,danAll,daxp-Jiao_Zhang_sheng-Hong_niang01-qm,,
 7 | part1,,danAll,daxp-Jiao_Zhang_sheng-Hong_niang04-qm,,
 8 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai02-qm,,
 9 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai04-qm,,
10 | part1,,danAll,daxp-Zhe_cai_shi-Suo_lin_nang01-qm,,
11 | part1,,laosheng,lseh-Tan_Yang_jia-Hong_yang_dong-qm,,
12 | part1,,laosheng,lseh-Wei_guo_jia-Hong_yang_dong02-qm,,
13 | part1,,laosheng,lseh-Wo_ben_shi-Qiong_lin_yan-qm,,
14 | part1,,laosheng,lseh-Yi_lun_ming-Wen_zhao_guan-qm,,
15 | part1,,laosheng,lseh-Zi_na_ri-Hong_yang_dong-qm,,
16 | part1,,laosheng,lsxp-Guo_liao_yi-Wen_zhao_guan02-qm,,
17 | part1,,laosheng,lsxp-Huai_nan_wang-Huai_he_ying02-qm,,
18 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian02-qm,,
19 | part1,,laosheng,lsxp-Qian_bai_wan-Si_lang_tang_mu01-qm,,
20 | part1,,laosheng,lsxp-Quan_qian_sui-Gan_lu_si-qm,,
21 | part1,,laosheng,lsxp-Shi_ye_shuo-Ding_jun_shan-qm,,
22 | part1,,laosheng,lsxp-Wo_ben_shi-Kong_cheng_ji-qm,,
23 | part1,,laosheng,lsxp-Wo_zheng_zai-Kong_cheng_ji04-qm,,
24 | part1,,laosheng,lsxp-Xi_ri_you-Zhu_lian_zhai-qm,,
25 | part1,,danAll,daeh-Yang_Yu_huan-Tai_zhen_wai_zhuan-lon,,
26 | part1,,danAll,daspd-Du_shou_kong-Wang_jiang_ting-upf,,
27 | part1,,danAll,daspd-Hai_dao_bing-Gui_fei_zui_jiu01-lon,,
28 | part1,,danAll,daxp-Guan_Shi_yin-Tian_nv_san_hua-lon,,
29 | part1,,danAll,daxp-Meng_ting_de-Mu_Gui_ying_gua_shuai01-upf,,
30 | part1,,laosheng,lseh-Wei_guo_jia-Hong_yang_dong01-lon,,
31 | part1,,laosheng,lsxp-Huai_nan_wang-Huai_he_ying01-lon,,
32 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian01-1-upf,,
33 | part1,,laosheng,lsxp-Jiang_shen_er-San_jia_dian01-2-upf,,
34 | part1,,laosheng,lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf,,
35 | part2,,20170327LiaoJiaNi,lsxp-Yi_ma_li-Wu_jia_po-nacta,,
36 | part2,,20170327LiaoJiaNi,lseh-Niang_zi_bu-Sou_gu_jiu-nacta,,
37 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,teacher,,
38 | part3,20171211SongRuoXuan,daxp-Fei_shi_wo-Hua_tian_cuo-dxjky,teacher,,
39 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,teacher,,
40 | part3,20171214SongRuoXuan,daeh-Yang_yu_huan-Tai_zhen_wai_zhuan-nanluo,teacher,,
41 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,teacher,,
42 | part3,20171214SongRuoXuan,daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo,teacher,,
43 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,teacher,,
44 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,teacher,,
45 | part3,20171217TianHao,lsxp-Jiang_shen_er-San_jia_dian-sizhu,teacher,,
46 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,teacher,,
47 | part3,20171217TianHao,lsxp-Wei_guo_jia-Hong_yang_dong-sizhu,teacher,,
48 | part3,2017121718SongRuoXuan,lsxp-Zhe_yi_feng-Ding_jun_shan-dxjky-sizhu,teacher,,
49 | part3,20171217TianHao,lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou,teacher,,
50 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student01,,
51 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student02_first_half,,
52 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student02,,
53 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student03,,
54 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student04,,
55 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student05,,
56 | part3,20171211SongRuoXuan,daxp_Qing_zao_qi_lai-Mai_shui-dxjky,student06,,
57 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student01,,
58 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student02,,
59 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student03,,
60 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student04,,
61 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student05,,
62 | part3,20171211SongRuoXuan,daxp-Wo_jia_di-Hong_deng_ji-dxjky,student06,,
63 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_01,,
64 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_02,,
65 | part3,20171214SongRuoXuan,danbz-Kan_dai_wang-Ba_wang_bie_ji-nanluo,student_03,,
66 | part3,20171214SongRuoXuan,daspd-Hai_dao_bing-Gui_fei_zui_jiu-nanluo,student_01,,
67 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_01,,
68 | part3,20171214SongRuoXuan,daxp-Quan_jun_wang-Ba_wang_bie_ji-nanluo,student_03,,
69 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,student_01,,
70 | part3,20171217TianHao,lseh-Wo_men_shi-Zhi_qu-sizhu,student_02,,
71 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,student_01,,
72 | part3,20171217TianHao,lsxp-Lin_xing_he_ma-Hong_deng_ji-sizhu,student_02,,
73 | part3,20171217TianHao,lsxp-Ti_lan_xiao_mai-Hong_deng_ji-sizhu_mentougou,student_01_sizhu,,


--------------------------------------------------------------------------------
/neural_net/data/normal_jianzi.json:
--------------------------------------------------------------------------------
1 | ["xiu", "jue", "xian", "xiang", "zheng", "xin", "qiu", "qie", "chu", "qing", "ji", "xiao", "qian", "xi"]


--------------------------------------------------------------------------------
/neural_net/data/normal_special.json:
--------------------------------------------------------------------------------
1 | ["na", "ai", "ri", "zhi", "ru", "shuo", "zhuang", "ming", "bei", "peng", "nei", "lai", "bai", "chang", "he", "cheng", "lei", "neng", "zhu", "jing", "shi", "lv", "me", "ding", "sheng", "wu", "xing", "qing", "hai", "shu", "ting", "ping", "meng", "ge", "quan", "an", "zheng", "bing", "ying", "e", "wo", "jie", "chu", "ke", "ceng", "que", "ji", "mao", "zei", "chun", "ling", "yuan", "fei", "ning", "deng", "zeng", "mai", "xie", "zhan", "zhao", "feng", "huai", "luo", "zhe"]


--------------------------------------------------------------------------------
/neural_net/file_path.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from neural_net.utils.csv_preprocessing import open_csv_recordings
 3 | 
 4 | dir_path = os.path.dirname(os.path.realpath(__file__))
 5 | 
 6 | path_root = '/Users/ronggong/Documents_using/MTG_document/Jingju_arias/'
 7 | 
 8 | path_nacta = 'jingju_a_cappella_singing_dataset'
 9 | path_nacta2017 = 'jingju_a_cappella_singing_dataset_extended_nacta2017'
10 | path_primary = 'primary_school_recording'
11 | 
12 | recordings_train = open_csv_recordings(os.path.join(dir_path, "data/mispronunciation_filelist_train.csv"))
13 | recordings_test = open_csv_recordings(os.path.join(dir_path, "data/mispronunciation_filelist_test.csv"))
14 | 
15 | filename_normal_special = os.path.join(dir_path, "data/normal_special.json")
16 | filename_normal_jianzi = os.path.join(dir_path, "data/normal_jianzi.json")
17 | 
18 | dict_special_positive = os.path.join(dir_path, "data/special_positive.pkl")
19 | dict_special_negative = os.path.join(dir_path, "data/special_negative.pkl")
20 | dict_jianzi_positive = os.path.join(dir_path, "data/jianzi_positive.pkl")
21 | dict_jianzi_negative = os.path.join(dir_path, "data/jianzi_negative.pkl")
22 | 
23 | joint_cnn_model_path = os.path.join(dir_path, 'model', 'segmentation')
24 | 
25 | filename_special_model = os.path.join(dir_path, "model", "special_model_prod_True_True_0.5.h5")
26 | filename_jianzi_model = os.path.join(dir_path, "model", "jianzi_model_prod_True_True_0.5.h5")
27 | # filename_jianzi_model = os.path.join(dir_path, "model", "jianzi_model_prod_feedforward_True_0.5.h5")
28 | 
29 | # filename_special_model = os.path.join(dir_path, "model", "special_model_prod_tcn_0.05.h5")
30 | # filename_jianzi_model = os.path.join(dir_path, "model", "jianzi_model_prod_tcn_0.05.h5")
31 | 
32 | filename_result_decoded_mispronunciaiton = os.path.join(dir_path, "results", "text_decoded_special_True_True_0.5")
33 | # filename_result_decoded_mispronunciaiton = os.path.join(dir_path, "results", "text_decoded_special_feedforward_True_0.5")
34 | 
35 | path_figs_jianzi = "/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/figs/jianzi"
36 | 
37 | 
38 | def getRecordings(wav_path):
39 |     recordings = []
40 |     for root, subFolders, files in os.walk(wav_path):
41 |         for f in files:
42 |             file_prefix, file_extension = os.path.splitext(f)
43 |             if file_prefix != '.DS_Store':
44 |                 recordings.append(file_prefix)
45 | 
46 |     return recordings
47 | 
48 | 
49 | def parse_recordings(rec):
50 |     if rec[0] == "part1":
51 |         data_path = path_nacta
52 |         sub_folder = rec[2]
53 |         textgrid_folder = "textgrid"
54 |         wav_folder = "wav_left"
55 |         syllable_tier = "dian"
56 |         if rec[3][:2] == 'da':
57 |             roletype = 'Dan'
58 |         elif rec[3][:2] == 'ls':
59 |             roletype = 'Laosheng'
60 |         else:
61 |             raise ValueError("Not exist a role-type {} for file {}".format(rec[3][:2], rec))
62 |     elif rec[0] == "part2":
63 |         data_path = path_nacta2017
64 |         sub_folder = rec[2]
65 |         textgrid_folder = "textgridDetails"
66 |         wav_folder = "wav"
67 |         syllable_tier = "dianSilence"
68 |         if rec[3][:2] == 'da':
69 |             roletype = 'Dan'
70 |         elif rec[3][:2] == 'ls':
71 |             roletype = 'Laosheng'
72 |         else:
73 |             raise ValueError("Not exist a role-type {} for file {}".format(rec[3][:2], rec))
74 |     else:
75 |         data_path = path_primary
76 |         sub_folder = rec[1] + "/" + rec[2]
77 |         textgrid_folder = "textgrid"
78 |         wav_folder = "wav_left"
79 |         syllable_tier = "dianSilence"
80 |         if rec[2][:2] == 'da':
81 |             roletype = 'Dan'
82 |         elif rec[2][:2] == 'ls':
83 |             roletype = 'Laosheng'
84 |         else:
85 |             raise ValueError("Not exist a role-type {} for file {}".format(rec[2][:2], rec))
86 | 
87 |     filename = rec[3]
88 |     line_tier = "line"
89 |     longsyllable_tier = "longsyllable"
90 |     phoneme_tier = "details"
91 |     special_tier = "special"
92 |     special_class_tier = "specialClass"
93 | 
94 |     return data_path, sub_folder, textgrid_folder, \
95 |            wav_folder, filename, line_tier, \
96 |            longsyllable_tier, syllable_tier, phoneme_tier, \
97 |            special_tier, special_class_tier, roletype


--------------------------------------------------------------------------------
/neural_net/keras_tcn/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | .idea/
  6 | .DS_Store
  7 | 
  8 | *.tsv
  9 | *.tar.gz
 10 | *out*
 11 | credentials.json
 12 | 
 13 | *.json
 14 | 
 15 | nohup.out
 16 | *.out
 17 | *.txt
 18 | 
 19 | # C extensions
 20 | *.so
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | env/
 25 | build/
 26 | develop-eggs/
 27 | dist/
 28 | downloads/
 29 | eggs/
 30 | .eggs/
 31 | lib/
 32 | lib64/
 33 | parts/
 34 | sdist/
 35 | var/
 36 | wheels/
 37 | *.egg-info/
 38 | .installed.cfg
 39 | *.egg
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .coverage
 55 | .coverage.*
 56 | .cache
 57 | nosetests.xml
 58 | coverage.xml
 59 | *.cover
 60 | .hypothesis/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # celery beat schedule file
 90 | celerybeat-schedule
 91 | 
 92 | # SageMath parsed files
 93 | *.sage.py
 94 | 
 95 | # dotenv
 96 | .env
 97 | 
 98 | # virtualenv
 99 | .venv
100 | venv/
101 | ENV/
102 | 
103 | # Spyder project settings
104 | .spyderproject
105 | .spyproject
106 | 
107 | # Rope project settings
108 | .ropeproject
109 | 
110 | # mkdocs documentation
111 | /site
112 | 
113 | # mypy
114 | .mypy_cache/
115 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Philippe Rémy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/README.md:
--------------------------------------------------------------------------------
  1 | # Keras TCN
  2 | *Keras Temporal Convolutional Network*
  3 | 
  4 |  * [Keras TCN](#keras-tcn)
  5 |     * [Why Temporal Convolutional Network?](#why-temporal-convolutional-network)
  6 |     * [API](#api)
  7 |        * [Regression (Many to one) e.g. adding problem](#--regression-many-to-one-eg-adding-problem)
  8 |        * [Classification (Many to one) e.g. copy memory task](#--classification-many-to-one-eg-copy-memory-task)
  9 |        * [Classification (Many to one) e.g. sequential mnist task](#--classification-many-to-one-eg-sequential-mnist-task)
 10 |     * [Installation](#installation)
 11 |     * [Run](#run)
 12 |     * [Tasks](#tasks)
 13 |     * [References](#references)
 14 | 
 15 | ## Why Temporal Convolutional Network?
 16 | 
 17 | - TCNs exhibit longer memory than recurrent architectures with the same capacity.
 18 | - Constantly performs better than LSTM/GRU architectures on a vast range of tasks (Seq. MNIST, Adding Problem, Copy Memory, Word-level PTB...).
 19 | - Parallelism, flexible receptive field size, stable gradients, low memory requirements for training, variable length inputs...
 20 | 
 21 | <p align="center">
 22 |   <img src="misc/Dilated_Conv.png">
 23 |   <b>Visualization of a stack of dilated causal convolutional layers (Wavenet, 2016)</b><br><br>
 24 | </p>
 25 | 
 26 | ## API
 27 | 
 28 | After installation, the model can be imported like this:
 29 | 
 30 | ```
 31 | from tcn import tcn
 32 | ```
 33 | 
 34 | In the following examples, we assume the input to have a shape `(batch_size, timesteps, input_dim)`.
 35 | 
 36 | The model is a Keras model. The model functions (`model.summary`, `model.fit`, `model.predict`...) are all functional.
 37 | 
 38 | 
 39 | 
 40 | ### - Regression (Many to one) e.g. adding problem
 41 | 
 42 | ```
 43 | model = tcn.dilated_tcn(output_slice_index='last',
 44 |                         num_feat=input_dim,
 45 | 			num_classes=None,
 46 |                         nb_filters=24,
 47 |                         kernel_size=8,
 48 |                         dilatations=[1, 2, 4, 8],
 49 |                         nb_stacks=8,
 50 |                         max_len=timesteps,
 51 |                         activation='norm_relu',
 52 |                         regression=True)
 53 | ```
 54 | 
 55 | For a Many to Many regression, a cheap fix for now is to change the [number of units of the final Dense layer](https://github.com/philipperemy/keras-tcn/blob/8151b4a87f906fd856fd1c113c48392d542d0994/tcn/tcn.py#L90).
 56 | 
 57 | ### - Classification (Many to many) e.g. copy memory task
 58 | 
 59 | ```
 60 | model = tcn.dilated_tcn(num_feat=input_dim,
 61 |                         num_classes=10,
 62 |                         nb_filters=10,
 63 |                         kernel_size=8,
 64 |                         dilatations=[1, 2, 4, 8],
 65 |                         nb_stacks=8,
 66 |                         max_len=timesteps,
 67 |                         activation='norm_relu')
 68 | ```
 69 | 
 70 | ### - Classification (Many to one) e.g. sequential mnist task
 71 | 
 72 | ```
 73 | model = tcn.dilated_tcn(output_slice_index='last',
 74 |                         num_feat=input_dim,
 75 |                         num_classes=10,
 76 |                         nb_filters=64,
 77 |                         kernel_size=8,
 78 |                         dilatations=[1, 2, 4, 8],
 79 |                         nb_stacks=8,
 80 |                         max_len=timesteps,
 81 |                         activation='norm_relu')
 82 | ```
 83 | 
 84 | ## Installation
 85 | 
 86 | ```
 87 | git clone git@github.com:philipperemy/keras-tcn.git
 88 | cd keras-tcn
 89 | virtualenv -p python3.6 venv
 90 | source venv/bin/activate
 91 | pip install -r requirements.txt # change to tensorflow if you dont have a gpu.
 92 | python setup.py install # install keras-tcn as a package
 93 | ```
 94 | 
 95 | ## Run
 96 | 
 97 | Once `keras-tcn` is installed as a package, you can take a glimpse of what's possible to do with TCNs. Some tasks examples are  available in the repository for this purpose:
 98 | 
 99 | ```
100 | cd adding_problem/
101 | python main.py # run adding problem task
102 | 
103 | cd copy_memory/
104 | python main.py # run copy memory task
105 | 
106 | cd mnist_pixel/
107 | python main.py # run sequential mnist pixel task
108 | ```
109 | 
110 | ## Tasks
111 | 
112 | ### Adding Task
113 | 
114 | The task consists of feeding a large array of decimal numbers to the network, along with a boolean array of the same length. The objective is to sum the two decimals where the boolean array contain the two 1s.
115 | 
116 | #### Explanation
117 | 
118 | <p align="center">
119 |   <img src="misc/Adding_Task.png">
120 |   <b>Adding Problem Task</b><br><br>
121 | </p>
122 | 
123 | #### Implementation results
124 | 
125 | The model takes time to learn this task. It's symbolized by a very long plateau (could take ~8 epochs on some runs).
126 | 
127 | ```
128 | 200000/200000 [==============================] - 451s 2ms/step - loss: 0.1749 - val_loss: 0.1662
129 | 200000/200000 [==============================] - 449s 2ms/step - loss: 0.1681 - val_loss: 0.1676
130 | 200000/200000 [==============================] - 449s 2ms/step - loss: 0.1677 - val_loss: 0.1663
131 | 200000/200000 [==============================] - 449s 2ms/step - loss: 0.1676 - val_loss: 0.1652
132 | 200000/200000 [==============================] - 449s 2ms/step - loss: 0.1165 - val_loss: 0.0093
133 | 200000/200000 [==============================] - 448s 2ms/step - loss: 0.0083 - val_loss: 0.0033
134 | 200000/200000 [==============================] - 448s 2ms/step - loss: 0.0040 - val_loss: 0.0012
135 | ```
136 | 
137 | ### Copy Memory Task
138 | 
139 | The copy memory consists of a very large array:
140 | - At the beginning, there's the vector x of length N. This is the vector to copy.
141 | - At the end, N+1 9s are present. The first 9 is seen as a delimiter.
142 | - In the middle, only 0s are there.
143 | 
144 | The idea is to copy the content of the vector x to the end of the large array. The task is made sufficiently complex by increasing the number of 0s in the middle.
145 | 
146 | #### Explanation
147 | 
148 | <p align="center">
149 |   <img src="misc/Copy_Memory_Task.png">
150 |   <b>Copy Memory Task</b><br><br>
151 | </p>
152 | 
153 | #### Implementation results
154 | 
155 | ```
156 | 10000/10000 [==============================] - 20s 2ms/step - loss: 0.3474 - acc: 0.8985 - val_loss: 0.0362 - val_acc: 0.9859
157 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0360 - acc: 0.9859 - val_loss: 0.0353 - val_acc: 0.9859
158 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0351 - acc: 0.9859 - val_loss: 0.0345 - val_acc: 0.9859
159 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0342 - acc: 0.9860 - val_loss: 0.0336 - val_acc: 0.9860
160 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0332 - acc: 0.9865 - val_loss: 0.0307 - val_acc: 0.9883
161 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0240 - acc: 0.9898 - val_loss: 0.0157 - val_acc: 0.9933
162 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0136 - acc: 0.9951 - val_loss: 0.0094 - val_acc: 0.9976
163 | 10000/10000 [==============================] - 13s 1ms/step - loss: 0.0087 - acc: 0.9978 - val_loss: 0.0049 - val_acc: 1.0000
164 | 10000/10000 [==============================] - 14s 1ms/step - loss: 0.0050 - acc: 0.9992 - val_loss: 0.0020 - val_acc: 1.0000
165 | ```
166 | 
167 | ### Sequential MNIST
168 | 
169 | #### Explanation
170 | 
171 | The idea here is to consider MNIST images as 1-D sequences and feed them to the network. This task is particularly hard because sequences are 28*28 = 784 elements. In order to classify correctly, the network has to remember all the sequence. Usual LSTM are unable to perform well on this task.
172 | 
173 | <p align="center">
174 |   <img src="misc/Sequential_MNIST_Task.png">
175 |   <b>Sequential MNIST</b><br><br>
176 | </p>
177 | 
178 | #### Implementation results
179 | 
180 | ```
181 | 60000/60000 [==============================] - 569s 9ms/step - loss: 0.2209 - acc: 0.9303 - val_loss: 0.0699 - val_acc: 0.9781
182 | 60000/60000 [==============================] - 545s 9ms/step - loss: 0.0784 - acc: 0.9760 - val_loss: 0.0507 - val_acc: 0.9843
183 | 60000/60000 [==============================] - 553s 9ms/step - loss: 0.0599 - acc: 0.9824 - val_loss: 0.0512 - val_acc: 0.9840
184 | 60000/60000 [==============================] - 555s 9ms/step - loss: 0.0493 - acc: 0.9851 - val_loss: 0.0569 - val_acc: 0.9824
185 | 60000/60000 [==============================] - 549s 9ms/step - loss: 0.0421 - acc: 0.9868 - val_loss: 0.0424 - val_acc: 0.9864
186 | 60000/60000 [==============================] - 558s 9ms/step - loss: 0.0358 - acc: 0.9886 - val_loss: 0.0416 - val_acc: 0.9874
187 | 60000/60000 [==============================] - 536s 9ms/step - loss: 0.0317 - acc: 0.9901 - val_loss: 0.0566 - val_acc: 0.9835
188 | 60000/60000 [==============================] - 483s 8ms/step - loss: 0.0272 - acc: 0.9915 - val_loss: 0.0565 - val_acc: 0.9845
189 | 60000/60000 [==============================] - 489s 8ms/step - loss: 0.0278 - acc: 0.9915 - val_loss: 0.0421 - val_acc: 0.9874
190 | 60000/60000 [==============================] - 483s 8ms/step - loss: 0.0227 - acc: 0.9929 - val_loss: 0.0464 - val_acc: 0.9882
191 | 60000/60000 [==============================] - 484s 8ms/step - loss: 0.0203 - acc: 0.9935 - val_loss: 0.0428 - val_acc: 0.9890
192 | 60000/60000 [==============================] - 484s 8ms/step - loss: 0.0212 - acc: 0.9934 - val_loss: 0.0539 - val_acc: 0.9884
193 | 60000/60000 [==============================] - 483s 8ms/step - loss: 0.0167 - acc: 0.9947 - val_loss: 0.0393 - val_acc: 0.9900
194 | ```
195 | 
196 | 
197 | 
198 | ## References
199 | - https://github.com/locuslab/TCN/ (TCN for Pytorch)
200 | - https://arxiv.org/pdf/1803.01271.pdf (An Empirical Evaluation of Generic Convolutional and Recurrent Networks
201 | for Sequence Modeling)
202 | - https://arxiv.org/pdf/1609.03499.pdf (Original Wavenet paper)
203 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/keras_tcn/__init__.py


--------------------------------------------------------------------------------
/neural_net/keras_tcn/adding_problem/README.md:
--------------------------------------------------------------------------------
 1 | ## The Adding Problem
 2 | 
 3 | ### Overview
 4 | 
 5 | In this task, each input consists of a length-T sequence of depth 2, with all values randomly
 6 | chosen randomly in [0, 1] in dimension 1. The second dimension consists of all zeros except for
 7 | two elements, which are marked by 1. The objective is to sum the two random values whose second
 8 | dimensions are marked by 1. One can think of this as computing the dot product of two dimensions.
 9 | 
10 | Simply predicting the sum to be 1 should give an MSE of about 0.1767.
11 | 
12 | ### Data Generation
13 | 
14 | See `data_generator` in `utils.py`.
15 | 
16 | ### Note
17 | 
18 | Because a TCN's receptive field depends on depth of the network and the filter size, we need
19 | to make sure these the model we use can cover the sequence length T.
20 | 
21 | From: https://github.com/locuslab/TCN/


--------------------------------------------------------------------------------
/neural_net/keras_tcn/adding_problem/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/keras_tcn/adding_problem/__init__.py


--------------------------------------------------------------------------------
/neural_net/keras_tcn/adding_problem/main.py:
--------------------------------------------------------------------------------
 1 | import keras
 2 | 
 3 | from tcn import tcn
 4 | from utils import data_generator
 5 | 
 6 | x_train, y_train = data_generator(n=200000, seq_length=600)
 7 | x_test, y_test = data_generator(n=40000, seq_length=600)
 8 | 
 9 | 
10 | class PrintSomeValues(keras.callbacks.Callback):
11 | 
12 |     def on_epoch_begin(self, epoch, logs={}):
13 |         print(f'x_test[0:1] = {x_test[0:1]}.')
14 |         print(f'y_test[0:1] = {y_test[0:1]}.')
15 |         print(f'pred = {self.model.predict(x_test[0:1])}.')
16 | 
17 | 
18 | def run_task():
19 |     model, param_str = tcn.dilated_tcn(output_slice_index='last',
20 |                                        num_feat=x_train.shape[2],
21 |                                        num_classes=0,
22 |                                        nb_filters=24,
23 |                                        kernel_size=8,
24 |                                        dilatations=[1, 2, 4, 8],
25 |                                        nb_stacks=8,
26 |                                        max_len=x_train.shape[1],
27 |                                        activation='norm_relu',
28 |                                        use_skip_connections=False,
29 |                                        return_param_str=True,
30 |                                        regression=True)
31 | 
32 |     print(f'x_train.shape = {x_train.shape}')
33 |     print(f'y_train.shape = {y_train.shape}')
34 | 
35 |     psv = PrintSomeValues()
36 | 
37 |     # Using sparse softmax.
38 |     # http://chappers.github.io/web%20micro%20log/2017/01/26/quick-models-in-keras/
39 |     model.summary()
40 | 
41 |     model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=500,
42 |               callbacks=[psv], batch_size=128)
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     run_task()
47 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/adding_problem/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def data_generator(n, seq_length):
 5 |     """
 6 |     Args:
 7 |         seq_length: Length of the adding problem data
 8 |         n: # of data in the set
 9 |     """
10 |     x_num = np.random.uniform(0, 1, (n, 1, seq_length))
11 |     x_mask = np.zeros([n, 1, seq_length])
12 |     y = np.zeros([n, 1])
13 |     for i in range(n):
14 |         positions = np.random.choice(seq_length, size=2, replace=False)
15 |         x_mask[i, 0, positions[0]] = 1
16 |         x_mask[i, 0, positions[1]] = 1
17 |         y[i, 0] = x_num[i, 0, positions[0]] + x_num[i, 0, positions[1]]
18 |     x = np.concatenate((x_num, x_mask), axis=1)
19 |     x = np.transpose(x, (0, 2, 1))
20 |     return x, y
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     print(data_generator(n=20, seq_length=10))
25 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/copy_memory/README.md:
--------------------------------------------------------------------------------
 1 | ## Copying Memory Task
 2 | 
 3 | ### Overview
 4 | 
 5 | In this task, each input sequence has length T+20. The first 10 values are chosen randomly
 6 | among the digits 1-8, with the rest being all zeros, except for the last 11 entries that are
 7 | filled with the digit ‘9’ (the first ‘9’ is a delimiter). The goal is to generate an output
 8 | of same length that is zero everywhere, except the last 10 values after the delimiter, where
 9 | the model is expected to repeat the 10 values it encountered at the start of the input.
10 | 
11 | ### Data Generation
12 | 
13 | See `data_generator` in `utils.py`.
14 | 
15 | ### Note
16 | 
17 | - Because a TCN's receptive field depends on depth of the network and the filter size, we need
18 | to make sure these the model we use can cover the sequence length T+20.
19 | 
20 | - Using the `--seq_len` flag, one can change the # of values to recall (the typical setup is 10).
21 | 
22 | From: From: https://github.com/locuslab/TCN/


--------------------------------------------------------------------------------
/neural_net/keras_tcn/copy_memory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/keras_tcn/copy_memory/__init__.py


--------------------------------------------------------------------------------
/neural_net/keras_tcn/copy_memory/main.py:
--------------------------------------------------------------------------------
 1 | import keras
 2 | 
 3 | from utils import data_generator
 4 | from tcn import tcn
 5 | 
 6 | x_train, y_train = data_generator(601, 10, 10000)
 7 | x_test, y_test = data_generator(601, 10, 2000)
 8 | 
 9 | 
10 | class PrintSomeValues(keras.callbacks.Callback):
11 | 
12 |     def on_epoch_begin(self, epoch, logs={}):
13 |         print(f'x_test[0:1] = {x_test[0:1].flatten()}.')
14 |         print(f'y_test[0:1] = {y_test[0:1].flatten()}.')
15 |         print(f'p.shape = {self.model.predict(x_test[0:1]).shape}.')
16 |         print(f'p(x_test[0:1]) = {self.model.predict(x_test[0:1]).argmax(axis=2).flatten()}.')
17 | 
18 | 
19 | def run_task():
20 |     print(sum(x_train[0].tolist(), []))
21 |     print(sum(y_train[0].tolist(), []))
22 | 
23 |     model, param_str = tcn.dilated_tcn(num_feat=1,
24 |                                        num_classes=10,
25 |                                        nb_filters=10,
26 |                                        kernel_size=8,
27 |                                        dilatations=[1, 2, 4, 8],
28 |                                        nb_stacks=8,
29 |                                        max_len=x_train[0:1].shape[1],
30 |                                        activation='norm_relu',
31 |                                        use_skip_connections=False,
32 |                                        return_param_str=True)
33 | 
34 |     print(f'x_train.shape = {x_train.shape}')
35 |     print(f'y_train.shape = {y_train.shape}')
36 | 
37 |     psv = PrintSomeValues()
38 | 
39 |     # Using sparse softmax.
40 |     # http://chappers.github.io/web%20micro%20log/2017/01/26/quick-models-in-keras/
41 |     model.summary()
42 | 
43 |     model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100,
44 |               callbacks=[psv], batch_size=128)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     run_task()
49 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/copy_memory/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def data_generator(t, mem_length, b_size):
 5 |     """
 6 |     Generate data for the copying memory task
 7 |     :param t: The total blank time length
 8 |     :param mem_length: The length of the memory to be recalled
 9 |     :param b_size: The batch size
10 |     :return: Input and target data tensor
11 |     """
12 |     seq = np.array(np.random.randint(1, 9, size=(b_size, mem_length)), dtype=float)
13 |     zeros = np.zeros((b_size, t))
14 |     marker = 9 * np.ones((b_size, mem_length + 1))
15 |     placeholders = np.zeros((b_size, mem_length))
16 | 
17 |     x = np.array(np.concatenate((seq, zeros[:, :-1], marker), 1), dtype=int)
18 |     y = np.array(np.concatenate((placeholders, zeros, seq), 1), dtype=int)
19 |     return np.expand_dims(x, axis=2), np.expand_dims(y, axis=2)
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     print(data_generator(t=601, mem_length=10, b_size=1)[0].flatten())
24 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/misc/Adding_Task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/keras_tcn/misc/Adding_Task.png


--------------------------------------------------------------------------------
/neural_net/keras_tcn/misc/Copy_Memory_Task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/keras_tcn/misc/Copy_Memory_Task.png


--------------------------------------------------------------------------------
/neural_net/keras_tcn/misc/Dilated_Conv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/keras_tcn/misc/Dilated_Conv.png


--------------------------------------------------------------------------------
/neural_net/keras_tcn/misc/Sequential_MNIST_Task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/keras_tcn/misc/Sequential_MNIST_Task.png


--------------------------------------------------------------------------------
/neural_net/keras_tcn/mnist_pixel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/keras_tcn/mnist_pixel/__init__.py


--------------------------------------------------------------------------------
/neural_net/keras_tcn/mnist_pixel/main.py:
--------------------------------------------------------------------------------
 1 | import keras.backend as K
 2 | 
 3 | from utils import data_generator
 4 | from tcn import tcn
 5 | 
 6 | 
 7 | def get_activations(model, model_inputs, print_shape_only=False, layer_name=None):
 8 |     print('----- activations -----')
 9 |     activations = []
10 |     inp = model.input
11 | 
12 |     model_multi_inputs_cond = True
13 |     if not isinstance(inp, list):
14 |         # only one input! let's wrap it in a list.
15 |         inp = [inp]
16 |         model_multi_inputs_cond = False
17 | 
18 |     outputs = [layer.output for layer in model.layers if
19 |                layer.name == layer_name or layer_name is None]  # all layer outputs
20 | 
21 |     funcs = [K.function(inp + [K.learning_phase()], [out]) for out in outputs]  # evaluation functions
22 | 
23 |     if model_multi_inputs_cond:
24 |         list_inputs = []
25 |         list_inputs.extend(model_inputs)
26 |         list_inputs.append(0.)
27 |     else:
28 |         list_inputs = [model_inputs, 0.]
29 | 
30 |     # Learning phase. 0 = Test mode (no dropout or batch normalization)
31 |     # layer_outputs = [func([model_inputs, 0.])[0] for func in funcs]
32 |     layer_outputs = [func(list_inputs)[0] for func in funcs]
33 |     for layer_activations in layer_outputs:
34 |         activations.append(layer_activations)
35 |         if print_shape_only:
36 |             print(layer_activations.shape)
37 |         else:
38 |             print(layer_activations)
39 |     return activations
40 |     # np.sum(activations[15].squeeze(), axis=1)
41 | 
42 | 
43 | def run_task():
44 |     (x_train, y_train), (x_test, y_test) = data_generator()
45 | 
46 |     model, param_str = tcn.dilated_tcn(output_slice_index='last', # try 'first'.
47 |                                        num_feat=1,
48 |                                        num_classes=10,
49 |                                        nb_filters=64,
50 |                                        kernel_size=8,
51 |                                        dilatations=[1, 2, 4, 8],
52 |                                        nb_stacks=8,
53 |                                        max_len=x_train[0:1].shape[1],
54 |                                        activation='norm_relu',
55 |                                        use_skip_connections=False,
56 |                                        return_param_str=True)
57 | 
58 |     print(f'x_train.shape = {x_train.shape}')
59 |     print(f'y_train.shape = {y_train.shape}')
60 |     print(f'x_test.shape = {x_test.shape}')
61 |     print(f'y_test.shape = {y_test.shape}')
62 | 
63 |     model.summary()
64 | 
65 |     # a = np.zeros_like(x_train[0:1])
66 |     # a[:, 0, :] = 1.0
67 |     # print(get_activations(model, a))
68 | 
69 |     model.fit(x_train, y_train.squeeze().argmax(axis=1), epochs=100,
70 |               validation_data=(x_test, y_test.squeeze().argmax(axis=1)))
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     run_task()
75 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/mnist_pixel/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.datasets import mnist
 3 | from keras.utils import to_categorical
 4 | 
 5 | 
 6 | def data_generator():
 7 |     # input image dimensions
 8 |     img_rows, img_cols = 28, 28
 9 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
10 |     x_train = x_train.reshape(-1, img_rows * img_cols, 1)
11 |     x_test = x_test.reshape(-1, img_rows * img_cols, 1)
12 | 
13 |     num_classes = 10
14 |     y_train = to_categorical(y_train, num_classes)
15 |     y_test = to_categorical(y_test, num_classes)
16 | 
17 |     y_train = np.expand_dims(y_train, axis=2)
18 |     y_test = np.expand_dims(y_test, axis=2)
19 | 
20 |     x_train = x_train.astype('float32')
21 |     x_test = x_test.astype('float32')
22 |     x_train /= 255
23 |     x_test /= 255
24 | 
25 |     return (x_train, y_train), (x_test, y_test)
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     print(data_generator())
30 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='keras-tcn',
 5 |     version='1.4.0',
 6 |     description='Keras TCN',
 7 |     author='Philippe Remy',
 8 |     license='MIT',
 9 |     packages=['tcn'],
10 |     install_requires=['tensorflow-gpu', 'numpy']
11 | )
12 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/tcn/__init__.py:
--------------------------------------------------------------------------------
1 | from neural_net.keras_tcn.tcn import tcn
2 | 


--------------------------------------------------------------------------------
/neural_net/keras_tcn/tcn/tcn.py:
--------------------------------------------------------------------------------
  1 | import keras.backend as K
  2 | from keras import optimizers
  3 | from keras.layers import Conv1D, SpatialDropout1D
  4 | from keras.layers import Activation, Lambda
  5 | from keras.layers import Convolution1D, Dense
  6 | from keras.models import Input, Model
  7 | import keras.layers
  8 | 
  9 | 
 10 | def channel_normalization(x):
 11 |     # Normalize by the highest activation
 12 |     max_values = K.max(K.abs(x), 2, keepdims=True) + 1e-5
 13 |     out = x / max_values
 14 |     return out
 15 | 
 16 | 
 17 | def wave_net_activation(x):
 18 |     tanh_out = Activation('tanh')(x)
 19 |     sigm_out = Activation('sigmoid')(x)
 20 |     return keras.layers.multiply([tanh_out, sigm_out])
 21 | 
 22 | 
 23 | def residual_block(x, s, i, activation, nb_filters, kernel_size, dropout):
 24 |     original_x = x
 25 |     conv = Conv1D(filters=nb_filters, kernel_size=kernel_size,
 26 |                   dilation_rate=2 ** i, padding='causal',
 27 |                   name='dilated_conv_%d_tanh_s%d' % (2 ** i, s))(x)
 28 |     if activation == 'norm_relu':
 29 |         x = Activation('relu')(conv)
 30 |         x = Lambda(channel_normalization)(x)
 31 |     elif activation == 'wavenet':
 32 |         x = wave_net_activation(conv)
 33 |     else:
 34 |         x = Activation(activation)(conv)
 35 | 
 36 |     x = SpatialDropout1D(dropout)(x)
 37 | 
 38 |     # 1x1 conv.
 39 |     x = Convolution1D(nb_filters, 1, padding='same')(x)
 40 |     res_x = keras.layers.add([original_x, x])
 41 |     return res_x, x
 42 | 
 43 | 
 44 | def dilated_tcn(num_feat, num_classes, nb_filters,
 45 |                 kernel_size, dilatations, nb_stacks, max_len, dropout,
 46 |                 activation='wavenet', use_skip_connections=True,
 47 |                 return_param_str=False, output_slice_index=None,
 48 |                 regression=False):
 49 |     """
 50 |     dilation_depth : number of layers per stack
 51 |     nb_stacks : number of stacks.
 52 |     """
 53 |     input_layer = Input(name='input_layer', shape=(max_len, num_feat))
 54 |     x = input_layer
 55 |     x = Convolution1D(nb_filters, kernel_size, padding='causal', name='initial_conv')(x)
 56 | 
 57 |     skip_connections = []
 58 |     for s in range(nb_stacks):
 59 |         for i in dilatations:
 60 |             x, skip_out = residual_block(x, s, i, activation, nb_filters, kernel_size, dropout)
 61 |             skip_connections.append(skip_out)
 62 | 
 63 |     if use_skip_connections:
 64 |         x = keras.layers.add(skip_connections)
 65 |     x = Activation('relu')(x)
 66 | 
 67 |     if output_slice_index is not None:  # can test with 0 or -1.
 68 |         if output_slice_index == 'last':
 69 |             output_slice_index = -1
 70 |         if output_slice_index == 'first':
 71 |             output_slice_index = 0
 72 |         x = Lambda(lambda tt: tt[:, output_slice_index, :])(x)
 73 | 
 74 |     print('x.shape=', x.shape)
 75 | 
 76 |     if not regression:
 77 |         if num_classes == 2:
 78 |             x = Dense(1)(x)
 79 |             x = Activation('sigmoid', name='output_sigmoid')(x)
 80 |             output_layer = x
 81 |             print(f'model.x = {input_layer.shape}')
 82 |             print(f'model.y = {output_layer.shape}')
 83 |             model = Model(input_layer, output_layer)
 84 |             adam = optimizers.Adam(lr=0.002, clipnorm=1.)
 85 |             model.compile(adam, loss='binary_crossentropy', metrics=['accuracy'])
 86 |             print('Adam with norm clipping.')
 87 |         elif num_classes > 2:
 88 |             # classification
 89 |             x = Dense(num_classes)(x)
 90 |             x = Activation('softmax', name='output_softmax')(x)
 91 |             output_layer = x
 92 |             print(f'model.x = {input_layer.shape}')
 93 |             print(f'model.y = {output_layer.shape}')
 94 |             model = Model(input_layer, output_layer)
 95 | 
 96 |             adam = optimizers.Adam(lr=0.002, clipnorm=1.)
 97 |             model.compile(adam, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
 98 |             print('Adam with norm clipping.')
 99 |         else:
100 |             raise ValueError
101 |     else:
102 |         # regression
103 |         x = Dense(1)(x)
104 |         x = Activation('linear', name='output_dense')(x)
105 |         output_layer = x
106 |         print(f'model.x = {input_layer.shape}')
107 |         print(f'model.y = {output_layer.shape}')
108 |         model = Model(input_layer, output_layer)
109 |         adam = optimizers.Adam(lr=0.002, clipnorm=1.)
110 |         model.compile(adam, loss='mean_squared_error')
111 | 
112 |     if return_param_str:
113 |         param_str = 'D-TCN_C{}_B{}_L{}'.format(2, nb_stacks, dilatations)
114 |         return model, param_str
115 |     else:
116 |         return model
117 | 


--------------------------------------------------------------------------------
/neural_net/model/segmentation/jan_joint0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/model/segmentation/jan_joint0.h5


--------------------------------------------------------------------------------
/neural_net/normal_pronunciation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions related to normal pronunciation manipulation
 3 | """
 4 | import os
 5 | import json
 6 | from neural_net.file_path import path_root
 7 | from neural_net.file_path import recordings_train
 8 | from neural_net.file_path import parse_recordings
 9 | from neural_net.utils.textgrid_preprocessing import parse_syllable_line_list
10 | 
11 | 
12 | if __name__ == "__main__":
13 | 
14 |     list_normal_special = []  # the normal counterpart of the special pronunciation
15 |     list_normal_jianzi = []  # the normal counterpart of jianzi
16 | 
17 |     for rec in recordings_train:
18 |         data_path, sub_folder, textgrid_folder, \
19 |         wav_folder, filename, line_tier, longsyllable_tier, syllable_tier, \
20 |         phoneme_tier, special_tier, special_class_tier, roletype = parse_recordings(rec)
21 | 
22 |         textgrid_filename = os.path.join(path_root, data_path, textgrid_folder, sub_folder, filename + ".textgrid")
23 | 
24 |         print("Parse textgrid file {}".format(textgrid_filename))
25 | 
26 |         nested_syllable_list, is_file_exist, is_syllable_found = \
27 |             parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename,
28 |                                      parent_tier=longsyllable_tier,
29 |                                      child_tier=syllable_tier)
30 | 
31 |         nested_special_list, is_file_exist, is_special_found = \
32 |             parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename,
33 |                                      parent_tier=longsyllable_tier,
34 |                                      child_tier=special_tier)
35 | 
36 |         nested_specialClass_list, is_file_exist, is_specialClass_found = \
37 |             parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename,
38 |                                      parent_tier=longsyllable_tier,
39 |                                      child_tier=special_class_tier)
40 | 
41 |         nested_phoneme_list, is_file_exist, is_phoneme_found = \
42 |             parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename,
43 |                                      parent_tier=longsyllable_tier,
44 |                                      child_tier=phoneme_tier)
45 | 
46 |         for ii_line in range(len(nested_special_list)):
47 |             line_special_list = nested_special_list[ii_line]
48 |             if line_special_list[0][2] != "1":
49 |                 line_syllable_list = nested_syllable_list[ii_line]
50 |                 line_specialClass_list = nested_specialClass_list[ii_line]
51 | 
52 |                 for ii_syl in range(len(line_specialClass_list[1])):
53 |                     special_class = line_specialClass_list[1][ii_syl][2]
54 |                     try:
55 |                         syllable = line_syllable_list[1][ii_syl][2]
56 |                     except IndexError:
57 |                         raise IndexError(rec, ii_line)
58 | 
59 |                     if special_class == "1":  # shangkou
60 |                         shangkou = line_special_list[1][ii_syl][2]
61 |                         list_normal_special.append(syllable)
62 |                         # print("shangkou", syllable, shangkou, rec, ii_line)
63 |                     if special_class == "2":  # jiantuan
64 |                         jiantuan = line_special_list[1][ii_syl][2]
65 |                         list_normal_jianzi.append(syllable)
66 | 
67 |     list_normal_special = list(set(list_normal_special))
68 |     list_normal_jianzi = list(set(list_normal_jianzi))
69 | 
70 |     with open("./data/normal_special.json", "w") as f:
71 |         json.dump(list_normal_special, f)
72 | 
73 |     with open("./data/normal_jianzi.json", "w") as f:
74 |         json.dump(list_normal_jianzi, f)


--------------------------------------------------------------------------------
/neural_net/onsetSegmentEval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ronggong/mispronunciation-detection/bed6f39e7e90a76a87332db425e14363b477ccb4/neural_net/onsetSegmentEval/__init__.py


--------------------------------------------------------------------------------
/neural_net/onsetSegmentEval/evaluation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | Syllable segmentation evaluation: landmark and boundary evaluations
 5 | Only evaluate boundary onset
 6 | 
 7 | [1] A new hybrid approach for automatic speech signal segmentation
 8 | using silence signal detection, energy convex hull, and spectral variation
 9 | 
10 | [2] Syll-O-Matic: An adaptive time-frequency representation
11 | for the automatic segmentation of speech into syllables
12 | 
13 | [3] EVALUATION FRAMEWORK FOR AUTOMATIC SINGING
14 | TRANSCRIPTION
15 | """
16 | 
17 | from neural_net.onsetSegmentEval.phonemeMap import misMatchIgnorePhn
18 | from neural_net.onsetSegmentEval.phonemeMap import misMatchIgnoreSyl
19 | from neural_net.parameters import hopsize_t
20 | import numpy as np
21 | 
22 | 
23 | def onsetEval(groundtruthOnsets, detectedOnsets, tolerance, label):
24 |     """
25 |     :param groundtruthOnsets: [[onset time, onset label], ...]
26 |     :param detectedOnsets: [[onset time, onset label], ...]
27 |     :param tolerance: 0.025 or 0.05
28 |     :param label: True or False, if we want to evaluate the label
29 |     :return:
30 |     """
31 | 
32 |     numDetectedOnsets = len(detectedOnsets)
33 |     numGroundtruthOnsets = len(groundtruthOnsets)
34 | 
35 |     onsetCorrectlist = [0]*numDetectedOnsets
36 | 
37 |     for gtb in groundtruthOnsets:
38 |         for idx, db in enumerate(detectedOnsets):
39 |             onsetTh = tolerance                                          # onset threshold
40 | 
41 |             if abs(db[0]-gtb[0])<onsetTh:
42 |                 if label:
43 |                     if db[1] == gtb[1]:
44 |                         onsetCorrectlist[idx] = 1
45 |                 else:
46 |                     onsetCorrectlist[idx] = 1
47 | 
48 |     numOnsetCorrect = sum(onsetCorrectlist)
49 |     numInsertion = numDetectedOnsets - numOnsetCorrect
50 |     numDeletion = numGroundtruthOnsets - numOnsetCorrect
51 | 
52 |     return numDetectedOnsets, numGroundtruthOnsets, \
53 |            numOnsetCorrect, numInsertion, numDeletion
54 | 
55 | 
56 | def metrics(numDetected, numGroundtruth, numCorrect):
57 |     recall = (numCorrect/float(numGroundtruth))*100
58 |     precision = (numCorrect/float(numDetected))*100
59 |     if precision == 0 and recall == 0:
60 |         F1 = 0
61 |     else:
62 |         F1 = 2*(precision*recall)/(precision+recall)
63 | 
64 |     return recall, precision, F1
65 | 
66 | 
67 | def segmentEval(gt_resample, detected_resample):
68 | 
69 |     sampleCorrect = 0
70 |     for ii in range(len(gt_resample)):
71 |         if gt_resample[ii] == detected_resample[ii] or \
72 |                         [gt_resample[ii], detected_resample[ii]] in misMatchIgnorePhn or \
73 |                         [gt_resample[ii], detected_resample[ii]] in misMatchIgnoreSyl:
74 |             sampleCorrect += 1
75 | 
76 |     return sampleCorrect, len(gt_resample)
77 | 
78 | 
79 | def segment_eval_helper(onsets, line_time):
80 |     onsets_frame = np.round(np.array([sgo[0] for sgo in onsets]) / hopsize_t)
81 | 
82 |     resample = [onsets[0][1]]
83 | 
84 |     current = onsets[0][1]
85 | 
86 |     for ii_sample in range(1, int(round(line_time / hopsize_t))):
87 | 
88 |         if ii_sample in onsets_frame:
89 |             idx_onset = np.where(onsets_frame == ii_sample)
90 |             idx_onset = idx_onset[0][0]
91 |             current = onsets[idx_onset][1]
92 |         resample.append(current)
93 | 
94 |     return resample
95 | 


--------------------------------------------------------------------------------
/neural_net/parameters.py:
--------------------------------------------------------------------------------
 1 | fs = 44100
 2 | framesize_t = 0.025  # in second
 3 | hopsize_t = 0.010
 4 | 
 5 | framesize = int(round(framesize_t * fs))
 6 | hopsize = int(round(hopsize_t * fs))
 7 | 
 8 | highFrequencyBound = fs/2 if fs/2 < 11000 else 11000
 9 | 
10 | varin = {}
11 | # parameters of viterbi
12 | varin['delta_mode'] = 'proportion'
13 | varin['delta'] = 0.35
14 | 
15 | 
16 | def config_select(config):
17 |     if config[0] == 1 and config[1] == 0:
18 |         model_name = 'single_lstm'
19 |     elif config[0] == 1 and config[1] == 1:
20 |         model_name = 'single_lstm_single_dense'
21 |     elif config[0] == 2 and config[1] == 0:
22 |         model_name = 'two_lstm'
23 |     elif config[0] == 2 and config[1] == 1:
24 |         model_name = 'two_lstm_single_dense'
25 |     elif config[0] == 2 and config[1] == 2:
26 |         model_name = 'two_lstm_two_dense'
27 |     elif config[0] == 3 and config[1] == 0:
28 |         model_name = 'three_lstm'
29 |     elif config[0] == 3 and config[1] == 1:
30 |         model_name = 'three_lstm_single_dense'
31 |     elif config[0] == 3 and config[1] == 2:
32 |         model_name = 'three_lstm_two_dense'
33 |     elif config[0] == 3 and config[1] == 3:
34 |         model_name = 'three_lstm_three_dense'
35 |     else:
36 |         raise ValueError
37 | 
38 |     return model_name


--------------------------------------------------------------------------------
/neural_net/plot_code.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.style
 3 | import matplotlib as mpl
 4 | mpl.style.use('classic')
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | fontsize = 15
 8 | 
 9 | 
10 | def plot_spectro_att(mfcc0,
11 |                      att_vector,
12 |                      hopsize_t,
13 |                      filename_save):
14 | 
15 |     plt.figure(figsize=(16, 6))
16 | 
17 |     ax1 = plt.subplot(2, 1, 1)
18 |     y = np.arange(0, 80)
19 |     x = np.arange(0, mfcc0.shape[0]) * hopsize_t
20 |     plt.pcolormesh(x, y, np.transpose(mfcc0))
21 | 
22 |     ax1.set_ylabel('Syllable\nlog-mel spectro', fontsize=fontsize)
23 |     ax1.axis('tight')
24 | 
25 |     ax2 = plt.subplot(2, 1, 2)
26 |     x = np.arange(0, len(att_vector)) * hopsize_t
27 |     plt.plot(x, att_vector)
28 | 
29 |     ax2.set_ylabel('Attention\nvector', fontsize=fontsize)
30 |     ax2.axis('tight')
31 |     plt.xlabel('time (s)')
32 | 
33 |     plt.savefig(filename_save, bbox_inches='tight')
34 | 
35 |     # plt.show()


--------------------------------------------------------------------------------
/neural_net/training_sample_collection_syllable.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import json
  4 | from neural_net.parameters import *
  5 | from neural_net.file_path import *
  6 | from neural_net.utils.audio_preprocessing import get_log_mel_madmom
  7 | from neural_net.file_path import parse_recordings
  8 | from neural_net.utils.textgrid_preprocessing import parse_syllable_line_list
  9 | 
 10 | 
 11 | def dump_feature_syllable(recordings, list_normal_special, list_normal_jianzi):
 12 | 
 13 |     # feature dictionary
 14 |     dic_syllable_special = {}
 15 |     dic_syllable_jianzi = {}
 16 |     dic_syllable_special_normal = {}
 17 |     dic_syllable_jianzi_normal = {}
 18 | 
 19 |     for rec in recordings:
 20 |         data_path, sub_folder, textgrid_folder, \
 21 |         wav_folder, filename, line_tier, longsyllable_tier, syllable_tier, \
 22 |         phoneme_tier, special_tier, special_class_tier, roletype = parse_recordings(rec)
 23 | 
 24 |         wav_filename = os.path.join(path_root, data_path, wav_folder, sub_folder, filename + ".wav")
 25 |         textgrid_filename = os.path.join(path_root, data_path, textgrid_folder, sub_folder, filename + ".textgrid")
 26 | 
 27 |         print("Parse textgrid file {}".format(textgrid_filename))
 28 | 
 29 |         nested_syllable_list, is_file_exist, is_syllable_found = \
 30 |             parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename,
 31 |                                      parent_tier=longsyllable_tier,
 32 |                                      child_tier=syllable_tier)
 33 | 
 34 |         nested_special_list, is_file_exist, is_special_found = \
 35 |             parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename,
 36 |                                      parent_tier=longsyllable_tier,
 37 |                                      child_tier=special_tier)
 38 | 
 39 |         nested_specialClass_list, is_file_exist, is_specialClass_found = \
 40 |             parse_syllable_line_list(ground_truth_text_grid_file=textgrid_filename,
 41 |                                      parent_tier=longsyllable_tier,
 42 |                                      child_tier=special_class_tier)
 43 | 
 44 |         log_mel = get_log_mel_madmom(audio_fn=wav_filename,
 45 |                                      fs=fs,
 46 |                                      hopsize_t=hopsize_t,
 47 |                                      channel=1,
 48 |                                      context=False)
 49 | 
 50 |         for ii_line in range(len(nested_special_list)):
 51 |             line_special_list = nested_special_list[ii_line]
 52 |             if line_special_list[0][2] != "1":
 53 |                 line_syllable_list = nested_syllable_list[ii_line]
 54 |                 line_specialClass_list = nested_specialClass_list[ii_line]
 55 | 
 56 |                 for ii_syl in range(len(line_specialClass_list[1])):
 57 |                     special_class = line_specialClass_list[1][ii_syl][2]
 58 |                     try:
 59 |                         syllable = line_syllable_list[1][ii_syl][2]
 60 |                     except IndexError:
 61 |                         raise IndexError(rec, ii_line)
 62 | 
 63 |                     if special_class == "1" or special_class == "2":
 64 |                         label_special = line_special_list[1][ii_syl][2]
 65 |                         onset = line_special_list[1][ii_syl][0]
 66 |                         offset = line_special_list[1][ii_syl][1]
 67 |                         sf = int(round(onset * fs / float(hopsize)))  # starting frame
 68 |                         ef = int(round(offset * fs / float(hopsize)))  # ending frame
 69 |                         log_mel_syllable = log_mel[sf:ef, :]
 70 | 
 71 |                         if len(log_mel_syllable):
 72 |                             if special_class == "1":  # shangkou
 73 |                                 num_special += 1
 74 |                                 if label_special in dic_syllable_special:
 75 |                                     dic_syllable_special[label_special].append(log_mel_syllable)
 76 |                                 else:
 77 |                                     dic_syllable_special[label_special] = [log_mel_syllable]
 78 |                             if special_class == "2":  # jiantuan
 79 |                                 num_jianzi += 1
 80 |                                 if label_special in dic_syllable_jianzi:
 81 |                                     dic_syllable_jianzi[label_special].append(log_mel_syllable)
 82 |                                 else:
 83 |                                     dic_syllable_jianzi[label_special] = [log_mel_syllable]
 84 | 
 85 |                     elif not special_class.isdigit():
 86 |                         onset = line_syllable_list[1][ii_syl][0]
 87 |                         offset = line_syllable_list[1][ii_syl][1]
 88 |                         sf = int(round(onset * fs / float(hopsize)))  # starting frame
 89 |                         ef = int(round(offset * fs / float(hopsize)))  # ending frame
 90 |                         log_mel_syllable = log_mel[sf:ef, :]
 91 | 
 92 |                         if len(log_mel_syllable):
 93 |                             if syllable in list_normal_special:
 94 |                                 if syllable in dic_syllable_special_normal:
 95 |                                     dic_syllable_special_normal[syllable].append(log_mel_syllable)
 96 |                                 else:
 97 |                                     dic_syllable_special_normal[syllable] = [log_mel_syllable]
 98 |                             if syllable in list_normal_jianzi:
 99 |                                 if syllable in dic_syllable_jianzi_normal:
100 |                                     dic_syllable_jianzi_normal[syllable].append(log_mel_syllable)
101 |                                 else:
102 |                                     dic_syllable_jianzi_normal[syllable] = [log_mel_syllable]
103 |                     else:
104 |                         pass
105 | 
106 |     return dic_syllable_special, dic_syllable_jianzi, dic_syllable_special_normal, dic_syllable_jianzi_normal
107 | 
108 | 
109 | if __name__ == "__main__":
110 | 
111 |     with open(filename_normal_special, "r") as f:
112 |         list_normal_special = json.load(f)
113 |     with open(filename_normal_jianzi, "r") as f:
114 |         list_normal_jianzi = json.load(f)
115 | 
116 |     dic_syllable_special, dic_syllable_jianzi, dic_syllable_special_normal, dic_syllable_jianzi_normal = \
117 |         dump_feature_syllable(recordings=recordings_train,
118 |                               list_normal_special=list_normal_special,
119 |                               list_normal_jianzi=list_normal_jianzi)
120 | 
121 |     with open(dict_special_positive, "wb") as f:
122 |         pickle.dump(dic_syllable_special, f)
123 | 
124 |     with open(dict_special_negative, "wb") as f:
125 |         pickle.dump(dic_syllable_special_normal, f)
126 | 
127 |     with open(dict_jianzi_positive, "wb") as f:
128 |         pickle.dump(dic_syllable_jianzi, f)
129 | 
130 |     with open(dict_jianzi_negative, "wb") as f:
131 |         pickle.dump(dic_syllable_jianzi_normal, f)
132 | 


--------------------------------------------------------------------------------
/neural_net/training_scripts/attention.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from keras import backend as K, initializers, regularizers, constraints
  3 | from keras.engine.topology import Layer
  4 | 
  5 | 
  6 | def dot_product(x, kernel):
  7 |     """
  8 |     Wrapper for dot product operation, in order to be compatible with both
  9 |     Theano and Tensorflow
 10 |     Args:
 11 |         x (): input
 12 |         kernel (): weights
 13 |     Returns:
 14 |     """
 15 |     if K.backend() == 'tensorflow':
 16 |         # todo: check that this is correct
 17 |         kernel = K.expand_dims(kernel)
 18 |         return K.squeeze(K.dot(x, kernel), axis=-1)
 19 |     else:
 20 |         return K.dot(x, kernel)
 21 | 
 22 | 
 23 | class Attention(Layer):
 24 |     def __init__(self,
 25 |                  W_regularizer=None, b_regularizer=None,
 26 |                  W_constraint=None, b_constraint=None,
 27 |                  bias=True,
 28 |                  return_attention=False,
 29 |                  **kwargs):
 30 |         """
 31 |         Keras Layer that implements an Attention mechanism for temporal data.
 32 |         Supports Masking.
 33 |         Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
 34 |         # Input shape
 35 |             3D tensor with shape: `(samples, steps, features)`.
 36 |         # Output shape
 37 |             2D tensor with shape: `(samples, features)`.
 38 |         :param kwargs:
 39 |         Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
 40 |         The dimensions are inferred based on the output shape of the RNN.
 41 |         Note: The layer has been tested with Keras 1.x
 42 |         Example:
 43 |             # 1
 44 |             model.add(LSTM(64, return_sequences=True))
 45 |             model.add(Attention())
 46 |             # next add a Dense layer (for classification/regression) or whatever...
 47 |             # 2 - Get the attention scores
 48 |             hidden = LSTM(64, return_sequences=True)(words)
 49 |             sentence, word_scores = Attention(return_attention=True)(hidden)
 50 |         """
 51 |         self.supports_masking = True
 52 |         self.return_attention = return_attention
 53 |         self.init = initializers.get('glorot_uniform')
 54 | 
 55 |         self.W_regularizer = regularizers.get(W_regularizer)
 56 |         self.b_regularizer = regularizers.get(b_regularizer)
 57 | 
 58 |         self.W_constraint = constraints.get(W_constraint)
 59 |         self.b_constraint = constraints.get(b_constraint)
 60 | 
 61 |         self.bias = bias
 62 |         super(Attention, self).__init__(**kwargs)
 63 | 
 64 |     def build(self, input_shape):
 65 |         assert len(input_shape) == 3
 66 | 
 67 |         self.W = self.add_weight((input_shape[-1],),
 68 |                                  initializer=self.init,
 69 |                                  name='{}_W'.format(self.name),
 70 |                                  regularizer=self.W_regularizer,
 71 |                                  constraint=self.W_constraint)
 72 |         if self.bias:
 73 |             self.b = self.add_weight((1,),
 74 |                                      initializer='zero',
 75 |                                      name='{}_b'.format(self.name),
 76 |                                      regularizer=self.b_regularizer,
 77 |                                      constraint=self.b_constraint)
 78 |         else:
 79 |             self.b = None
 80 | 
 81 |         self.built = True
 82 | 
 83 |     def compute_mask(self, input, input_mask=None):
 84 |         # do not pass the mask to the next layers
 85 |         return None
 86 | 
 87 |     def call(self, x, mask=None):
 88 |         eij = dot_product(x, self.W)  # (samples, steps)
 89 | 
 90 |         if self.bias:
 91 |             eij += self.b
 92 | 
 93 |         eij = K.tanh(eij)
 94 | 
 95 |         a = K.exp(eij)
 96 | 
 97 |         # apply mask after the exp. will be re-normalized next
 98 |         if mask is not None:
 99 |             # Cast the mask to floatX to avoid float64 upcasting in theano
100 |             a *= K.cast(mask, K.floatx())
101 | 
102 |         # in some cases especially in the early stages of training the sum may be almost zero
103 |         # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
104 |         # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
105 |         a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
106 | 
107 |         a_expand = K.expand_dims(a)
108 | 
109 |         # element wise
110 |         weighted_input = x * a_expand
111 | 
112 |         result = K.sum(weighted_input, axis=1)
113 | 
114 |         if self.return_attention:
115 |             return [result, a]
116 |         return result
117 | 
118 |     def compute_output_shape(self, input_shape):
119 |         if self.return_attention:
120 |             return [(input_shape[0], input_shape[-1]),
121 |                     (input_shape[0], input_shape[1])]
122 |         else:
123 |             return input_shape[0], input_shape[-1]


--------------------------------------------------------------------------------
/neural_net/training_scripts/generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def generator_batch1(list_feature, labels, scaler, shuffle=True):
 5 |     """data generator"""
 6 |     ii = 0
 7 |     while True:
 8 |         if scaler:
 9 |             fea = scaler.transform(list_feature[ii])
10 |         else:
11 |             fea = list_feature[ii]
12 | 
13 |         fea = np.expand_dims(fea, axis=0)
14 |         lab = np.expand_dims(labels[ii], axis=0)
15 | 
16 |         yield fea, lab
17 | 
18 |         ii += 1
19 | 
20 |         if ii >= len(list_feature):
21 |             ii = 0
22 |             if shuffle:
23 |                 p = np.random.permutation(len(list_feature))
24 |                 list_feature = [list_feature[ii_p] for ii_p in p]
25 |                 labels = labels[p]  # labels is a numpy array


--------------------------------------------------------------------------------
/neural_net/training_scripts/hpc_code/train_run_jianzi.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import pickle
  4 | import numpy as np
  5 | from sklearn.model_selection import StratifiedKFold
  6 | from sklearn.model_selection import train_test_split
  7 | from sklearn.preprocessing import StandardScaler
  8 | 
  9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 10 | 
 11 | from neural_net.training_scripts.models_RNN import train_RNN_batch
 12 | from neural_net.training_scripts.models_RNN import eval_RNN_model
 13 | from neural_net.combine_feature_label import combine_feature_label
 14 | from neural_net.file_path import *
 15 | 
 16 | 
 17 | if __name__ == '__main__':
 18 | 
 19 |     cv_prod = "prod"
 20 |     batch_size = 1
 21 |     input_shape = (batch_size, None, 80)
 22 |     patience = 15
 23 |     attention = "feedforward"
 24 |     conv = True
 25 |     dropout = 0.5
 26 |     epoch = 500
 27 | 
 28 |     path_model = '/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/model/'
 29 | 
 30 |     with open(dict_jianzi_positive, "rb") as f:
 31 |         feature_jianzi_pos = pickle.load(f)
 32 | 
 33 |     with open(dict_jianzi_negative, "rb") as f:
 34 |         feature_jianzi_neg = pickle.load(f)
 35 | 
 36 |     X_jianzi, y_jianzi = combine_feature_label(dict_positive=feature_jianzi_pos,
 37 |                                                dict_negative=feature_jianzi_neg)
 38 | 
 39 |     if cv_prod == "cv":
 40 |         list_loss = []
 41 |         list_acc = []
 42 |         skf = StratifiedKFold(n_splits=5)
 43 |         for ii, (train_index, val_index) in enumerate(skf.split(X_jianzi, y_jianzi)):
 44 | 
 45 |             model_name = 'jianzi_model_{}_{}_{}'.format(attention, conv, dropout)
 46 |             file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5')
 47 |             file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv')
 48 | 
 49 |             print("TRAIN:", train_index, "TEST:", val_index)
 50 | 
 51 |             X_train, X_test = [X_jianzi[ii] for ii in train_index], [X_jianzi[ii] for ii in val_index]
 52 |             y_train, y_test = y_jianzi[train_index], y_jianzi[val_index]
 53 | 
 54 |             X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.1)
 55 | 
 56 |             # standarization
 57 |             scaler = StandardScaler()
 58 |             X_train_conc = np.concatenate(X_train)
 59 |             scaler.fit(X_train_conc)
 60 | 
 61 |             model = train_RNN_batch(list_feature_fold_train=X_train,
 62 |                                     labels_fold_train=y_train,
 63 |                                     list_feature_fold_val=X_val,
 64 |                                     labels_fold_val=y_val,
 65 |                                     batch_size=batch_size,
 66 |                                     input_shape=input_shape,
 67 |                                     output_shape=1,
 68 |                                     file_path_model=file_path_model,
 69 |                                     filename_log=file_path_log,
 70 |                                     epoch=epoch,
 71 |                                     patience=patience,
 72 |                                     scaler=scaler,
 73 |                                     attention=attention,
 74 |                                     conv=conv,
 75 |                                     dropout=dropout,
 76 |                                     summ=True,
 77 |                                     verbose=2)
 78 | 
 79 |             loss_test = eval_RNN_model(list_feature_test=X_test,
 80 |                                        labels_test=y_test,
 81 |                                        file_path_model=file_path_model,
 82 |                                        attention=attention,
 83 |                                        scaler=scaler)
 84 | 
 85 |             list_loss.append(loss_test)
 86 | 
 87 |         with open(os.path.join(path_model, 'log', 'jianzi_esults_{}_{}_{}.txt'.format(attention, conv, dropout)), 'w') as f:
 88 |             f.write("attention {} conv {} dropout {} loss {}".format(attention, conv, dropout, np.mean(list_loss)))
 89 | 
 90 |     elif cv_prod == "prod":
 91 |         X_train, X_val, y_train, y_val = train_test_split(X_jianzi, y_jianzi, stratify=y_jianzi, test_size=0.1)
 92 | 
 93 |         model_name = 'jianzi_model_prod_{}_{}_{}'.format(attention, conv, dropout)
 94 |         file_path_model = os.path.join(path_model, model_name + '.h5')
 95 |         file_path_log = os.path.join(path_model, 'log', model_name + '.csv')
 96 | 
 97 |         # standarization
 98 |         scaler = StandardScaler()
 99 |         X_train_conc = np.concatenate(X_train)
100 |         scaler.fit(X_train_conc)
101 | 
102 |         train_RNN_batch(list_feature_fold_train=X_train,
103 |                         labels_fold_train=y_train,
104 |                         list_feature_fold_val=X_val,
105 |                         labels_fold_val=y_val,
106 |                         batch_size=batch_size,
107 |                         input_shape=input_shape,
108 |                         output_shape=1,
109 |                         file_path_model=file_path_model,
110 |                         filename_log=file_path_log,
111 |                         epoch=epoch,
112 |                         patience=patience,
113 |                         scaler=scaler,
114 |                         attention=attention,
115 |                         conv=conv,
116 |                         dropout=dropout,
117 |                         summ=True,
118 |                         verbose=2)
119 |     else:
120 |         raise ValueError("{} is not a valid option.".format(cv_prod))
121 | 


--------------------------------------------------------------------------------
/neural_net/training_scripts/hpc_code/train_run_jianzi_tcn.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import pickle
  4 | import numpy as np
  5 | from sklearn.model_selection import StratifiedKFold
  6 | from sklearn.model_selection import train_test_split
  7 | from sklearn.preprocessing import StandardScaler
  8 | 
  9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 10 | 
 11 | from neural_net.training_scripts.models_TCN import train_TCN_batch
 12 | from neural_net.training_scripts.models_RNN import eval_RNN_model
 13 | from neural_net.combine_feature_label import combine_feature_label
 14 | from neural_net.file_path import *
 15 | 
 16 | 
 17 | if __name__ == '__main__':
 18 | 
 19 |     cv_prod = "cv"
 20 |     batch_size = 1
 21 |     input_shape = (batch_size, None, 80)
 22 |     patience = 15
 23 |     attention = False
 24 |     dropout = 0.05
 25 |     epoch = 500
 26 | 
 27 |     path_model = '/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/model/'
 28 | 
 29 |     with open(dict_jianzi_positive, "rb") as f:
 30 |         feature_jianzi_pos = pickle.load(f)
 31 | 
 32 |     with open(dict_jianzi_negative, "rb") as f:
 33 |         feature_jianzi_neg = pickle.load(f)
 34 | 
 35 |     X_jianzi, y_jianzi = combine_feature_label(dict_positive=feature_jianzi_pos,
 36 |                                                dict_negative=feature_jianzi_neg)
 37 | 
 38 |     if cv_prod == "cv":
 39 |         list_loss = []
 40 |         list_acc = []
 41 |         skf = StratifiedKFold(n_splits=5)
 42 |         for ii, (train_index, val_index) in enumerate(skf.split(X_jianzi, y_jianzi)):
 43 | 
 44 |             model_name = 'jianzi_model_tcn_1_stack_3_{}'.format(dropout)
 45 |             file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5')
 46 |             file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv')
 47 | 
 48 |             print("TRAIN:", train_index, "TEST:", val_index)
 49 | 
 50 |             X_train, X_test = [X_jianzi[ii] for ii in train_index], [X_jianzi[ii] for ii in val_index]
 51 |             y_train, y_test = y_jianzi[train_index], y_jianzi[val_index]
 52 | 
 53 |             X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.1)
 54 | 
 55 |             # standarization
 56 |             scaler = StandardScaler()
 57 |             X_train_conc = np.concatenate(X_train)
 58 |             scaler.fit(X_train_conc)
 59 | 
 60 |             model = train_TCN_batch(list_feature_fold_train=X_train,
 61 |                                     labels_fold_train=y_train,
 62 |                                     list_feature_fold_val=X_val,
 63 |                                     labels_fold_val=y_val,
 64 |                                     batch_size=batch_size,
 65 |                                     input_shape=input_shape,
 66 |                                     file_path_model=file_path_model,
 67 |                                     filename_log=file_path_log,
 68 |                                     epoch=epoch,
 69 |                                     patience=patience,
 70 |                                     scaler=scaler,
 71 |                                     dropout=dropout,
 72 |                                     summ=True,
 73 |                                     verbose=2)
 74 | 
 75 |             loss_test = eval_RNN_model(list_feature_test=X_test,
 76 |                                        labels_test=y_test,
 77 |                                        file_path_model=file_path_model,
 78 |                                        attention=attention,
 79 |                                        scaler=scaler)
 80 | 
 81 |             list_loss.append(loss_test)
 82 | 
 83 |         with open(os.path.join(path_model, 'log', 'jianzi_esults_tcn_1_stack_3_{}.txt'.format(dropout)), 'w') as f:
 84 |             f.write("loss {}".format(np.mean(list_loss)))
 85 | 
 86 |     elif cv_prod == "prod":
 87 |         X_train, X_val, y_train, y_val = train_test_split(X_jianzi, y_jianzi, stratify=y_jianzi, test_size=0.1)
 88 | 
 89 |         model_name = 'jianzi_model_prod_tcn_{}'.format(dropout)
 90 |         file_path_model = os.path.join(path_model, model_name + '.h5')
 91 |         file_path_log = os.path.join(path_model, 'log', model_name + '.csv')
 92 | 
 93 |         # standarization
 94 |         scaler = StandardScaler()
 95 |         X_train_conc = np.concatenate(X_train)
 96 |         scaler.fit(X_train_conc)
 97 | 
 98 |         train_TCN_batch(list_feature_fold_train=X_train,
 99 |                         labels_fold_train=y_train,
100 |                         list_feature_fold_val=X_val,
101 |                         labels_fold_val=y_val,
102 |                         batch_size=batch_size,
103 |                         input_shape=input_shape,
104 |                         file_path_model=file_path_model,
105 |                         filename_log=file_path_log,
106 |                         epoch=epoch,
107 |                         patience=patience,
108 |                         scaler=scaler,
109 |                         dropout=dropout,
110 |                         summ=True,
111 |                         verbose=2)
112 |     else:
113 |         raise ValueError("{} is not a valid option.".format(cv_prod))
114 | 


--------------------------------------------------------------------------------
/neural_net/training_scripts/hpc_code/train_run_special.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import pickle
  4 | import numpy as np
  5 | from sklearn.model_selection import StratifiedKFold
  6 | from sklearn.model_selection import train_test_split
  7 | from sklearn.preprocessing import StandardScaler
  8 | 
  9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 10 | 
 11 | from neural_net.training_scripts.models_RNN import train_RNN_batch
 12 | from neural_net.training_scripts.models_RNN import eval_RNN_model
 13 | from neural_net.combine_feature_label import combine_feature_label
 14 | from neural_net.file_path import *
 15 | 
 16 | 
 17 | if __name__ == '__main__':
 18 | 
 19 |     cv_prod = "cv"
 20 |     batch_size = 1
 21 |     input_shape = (batch_size, None, 80)
 22 |     patience = 15
 23 |     attention = "selfatt"
 24 |     conv = True
 25 |     dropout = 0.5
 26 |     epoch = 500
 27 | 
 28 |     path_model = '/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/model/'
 29 | 
 30 |     with open(dict_special_positive, "rb") as f:
 31 |         feature_special_pos = pickle.load(f)
 32 | 
 33 |     with open(dict_special_negative, "rb") as f:
 34 |         feature_special_neg = pickle.load(f)
 35 | 
 36 |     X_special, y_special = combine_feature_label(dict_positive=feature_special_pos,
 37 |                                                  dict_negative=feature_special_neg)
 38 | 
 39 |     if cv_prod == "cv":
 40 |         list_loss = []
 41 |         list_acc = []
 42 |         skf = StratifiedKFold(n_splits=5)
 43 |         for ii, (train_index, val_index) in enumerate(skf.split(X_special, y_special)):
 44 | 
 45 |             model_name = 'special_model_{}_{}_{}'.format(attention, conv, dropout)
 46 |             file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5')
 47 |             file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv')
 48 | 
 49 |             print("TRAIN:", train_index, "TEST:", val_index)
 50 | 
 51 |             X_train, X_test = [X_special[ii] for ii in train_index], [X_special[ii] for ii in val_index]
 52 |             y_train, y_test = y_special[train_index], y_special[val_index]
 53 | 
 54 |             X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.1)
 55 | 
 56 |             # standarization
 57 |             scaler = StandardScaler()
 58 |             X_train_conc = np.concatenate(X_train)
 59 |             scaler.fit(X_train_conc)
 60 | 
 61 |             model = train_RNN_batch(list_feature_fold_train=X_train,
 62 |                                     labels_fold_train=y_train,
 63 |                                     list_feature_fold_val=X_val,
 64 |                                     labels_fold_val=y_val,
 65 |                                     batch_size=batch_size,
 66 |                                     input_shape=input_shape,
 67 |                                     output_shape=1,
 68 |                                     file_path_model=file_path_model,
 69 |                                     filename_log=file_path_log,
 70 |                                     epoch=epoch,
 71 |                                     patience=patience,
 72 |                                     scaler=scaler,
 73 |                                     attention=attention,
 74 |                                     conv=conv,
 75 |                                     dropout=dropout,
 76 |                                     summ=True,
 77 |                                     verbose=2)
 78 | 
 79 |             loss_test = eval_RNN_model(list_feature_test=X_test,
 80 |                                        labels_test=y_test,
 81 |                                        file_path_model=file_path_model,
 82 |                                        attention=attention,
 83 |                                        scaler=scaler)
 84 | 
 85 |             list_loss.append(loss_test)
 86 | 
 87 |         with open(os.path.join(path_model, 'log', 'special_results_{}_{}_{}.txt'.format(attention, conv, dropout)), 'w') as f:
 88 |             f.write("attention {} conv {} dropout {} loss {}".format(attention, conv, dropout, np.mean(list_loss)))
 89 | 
 90 |     elif cv_prod == "prod":
 91 |         X_train, X_val, y_train, y_val = train_test_split(X_special, y_special, stratify=y_special, test_size=0.1)
 92 | 
 93 |         model_name = 'special_model_prod_{}_{}_{}'.format(attention, conv, dropout)
 94 |         file_path_model = os.path.join(path_model, model_name + '.h5')
 95 |         file_path_log = os.path.join(path_model, 'log', model_name + '.csv')
 96 | 
 97 |         # standarization
 98 |         scaler = StandardScaler()
 99 |         X_train_conc = np.concatenate(X_train)
100 |         scaler.fit(X_train_conc)
101 | 
102 |         train_RNN_batch(list_feature_fold_train=X_train,
103 |                         labels_fold_train=y_train,
104 |                         list_feature_fold_val=X_val,
105 |                         labels_fold_val=y_val,
106 |                         batch_size=batch_size,
107 |                         input_shape=input_shape,
108 |                         output_shape=1,
109 |                         file_path_model=file_path_model,
110 |                         filename_log=file_path_log,
111 |                         epoch=epoch,
112 |                         patience=patience,
113 |                         scaler=scaler,
114 |                         attention=attention,
115 |                         conv=conv,
116 |                         dropout=dropout,
117 |                         summ=True,
118 |                         verbose=2)
119 |     else:
120 |         raise ValueError("{} is not a valid option.".format(cv_prod))
121 | 


--------------------------------------------------------------------------------
/neural_net/training_scripts/hpc_code/train_run_special_tcn.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import pickle
  4 | import numpy as np
  5 | from sklearn.model_selection import StratifiedKFold
  6 | from sklearn.model_selection import train_test_split
  7 | from sklearn.preprocessing import StandardScaler
  8 | 
  9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 10 | 
 11 | from neural_net.training_scripts.models_TCN import train_TCN_batch
 12 | from neural_net.training_scripts.models_RNN import eval_RNN_model
 13 | from neural_net.combine_feature_label import combine_feature_label
 14 | from neural_net.file_path import *
 15 | 
 16 | 
 17 | if __name__ == '__main__':
 18 | 
 19 |     cv_prod = "cv"
 20 |     batch_size = 1
 21 |     input_shape = (batch_size, None, 80)
 22 |     patience = 15
 23 |     attention = False
 24 |     dropout = 0.05
 25 |     epoch = 500
 26 | 
 27 |     path_model = '/Users/ronggong/PycharmProjects/mispronunciation-detection/neural_net/model/'
 28 | 
 29 |     with open(dict_special_positive, "rb") as f:
 30 |         feature_special_pos = pickle.load(f)
 31 | 
 32 |     with open(dict_special_negative, "rb") as f:
 33 |         feature_special_neg = pickle.load(f)
 34 | 
 35 |     X_special, y_special = combine_feature_label(dict_positive=feature_special_pos,
 36 |                                                  dict_negative=feature_special_neg)
 37 | 
 38 |     if cv_prod == "cv":
 39 |         list_loss = []
 40 |         list_acc = []
 41 |         skf = StratifiedKFold(n_splits=5)
 42 |         for ii, (train_index, val_index) in enumerate(skf.split(X_special, y_special)):
 43 | 
 44 |             model_name = 'special_model_tcn_1_stack_3_{}'.format(dropout)
 45 |             file_path_model = os.path.join(path_model, model_name + '_' + str(ii) + '.h5')
 46 |             file_path_log = os.path.join(path_model, 'log', model_name + '_' + str(ii) + '.csv')
 47 | 
 48 |             print("TRAIN:", train_index, "TEST:", val_index)
 49 | 
 50 |             X_train, X_test = [X_special[ii] for ii in train_index], [X_special[ii] for ii in val_index]
 51 |             y_train, y_test = y_special[train_index], y_special[val_index]
 52 | 
 53 |             X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.1)
 54 | 
 55 |             # standarization
 56 |             scaler = StandardScaler()
 57 |             X_train_conc = np.concatenate(X_train)
 58 |             scaler.fit(X_train_conc)
 59 | 
 60 |             model = train_TCN_batch(list_feature_fold_train=X_train,
 61 |                                     labels_fold_train=y_train,
 62 |                                     list_feature_fold_val=X_val,
 63 |                                     labels_fold_val=y_val,
 64 |                                     batch_size=batch_size,
 65 |                                     input_shape=input_shape,
 66 |                                     file_path_model=file_path_model,
 67 |                                     filename_log=file_path_log,
 68 |                                     epoch=epoch,
 69 |                                     patience=patience,
 70 |                                     scaler=scaler,
 71 |                                     dropout=dropout,
 72 |                                     summ=True,
 73 |                                     verbose=2)
 74 | 
 75 |             loss_test = eval_RNN_model(list_feature_test=X_test,
 76 |                                        labels_test=y_test,
 77 |                                        file_path_model=file_path_model,
 78 |                                        attention=attention,
 79 |                                        scaler=scaler)
 80 | 
 81 |             list_loss.append(loss_test)
 82 | 
 83 |         with open(os.path.join(path_model, 'log', 'special_results_tcn_1_stack_3_{}.txt'.format(dropout)), 'w') as f:
 84 |             f.write("loss {}".format(np.mean(list_loss)))
 85 | 
 86 |     elif cv_prod == "prod":
 87 |         X_train, X_val, y_train, y_val = train_test_split(X_special, y_special, stratify=y_special, test_size=0.1)
 88 | 
 89 |         model_name = 'special_model_prod_tcn_{}'.format(dropout)
 90 |         file_path_model = os.path.join(path_model, model_name + '.h5')
 91 |         file_path_log = os.path.join(path_model, 'log', model_name + '.csv')
 92 | 
 93 |         # standarization
 94 |         scaler = StandardScaler()
 95 |         X_train_conc = np.concatenate(X_train)
 96 |         scaler.fit(X_train_conc)
 97 | 
 98 |         train_TCN_batch(list_feature_fold_train=X_train,
 99 |                         labels_fold_train=y_train,
100 |                         list_feature_fold_val=X_val,
101 |                         labels_fold_val=y_val,
102 |                         batch_size=batch_size,
103 |                         input_shape=input_shape,
104 |                         file_path_model=file_path_model,
105 |                         filename_log=file_path_log,
106 |                         epoch=epoch,
107 |                         patience=patience,
108 |                         scaler=scaler,
109 |                         dropout=dropout,
110 |                         summ=True,
111 |                         verbose=2)
112 |     else:
113 |         raise ValueError("{} is not a valid option.".format(cv_prod))
114 | 


--------------------------------------------------------------------------------
/neural_net/training_scripts/models_RNN.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Input
  2 | from keras.models import Model
  3 | from keras.models import load_model
  4 | from keras.layers import Dropout
  5 | from keras.layers import LSTM
  6 | from keras.layers import CuDNNLSTM
  7 | from keras.layers import Bidirectional
  8 | from keras.layers import Dense
  9 | from keras.layers import Conv1D
 10 | from keras.layers import Conv2D
 11 | from keras.layers import Dot
 12 | from keras.layers import Lambda
 13 | from keras.layers import MaxPooling2D
 14 | from keras.layers import Reshape
 15 | from keras import backend as K
 16 | from keras.callbacks import EarlyStopping
 17 | from keras.callbacks import CSVLogger
 18 | from keras.callbacks import ModelCheckpoint
 19 | from keras.activations import softmax
 20 | from tensorflow.python.client import device_lib
 21 | 
 22 | import os
 23 | import sys
 24 | import numpy as np
 25 | from sklearn.metrics import log_loss
 26 | from sklearn.metrics import accuracy_score
 27 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__))))
 28 | from neural_net.training_scripts.attention import Attention
 29 | from neural_net.training_scripts.generator import generator_batch1
 30 | 
 31 | 
 32 | def conv_module(conv, input_shape, input):
 33 |     if conv:
 34 |         x = Reshape((-1, input_shape[2]) + (1,))(input)
 35 |         x = Conv2D(filters=8, kernel_size=(1, 3), activation="relu")(x)
 36 |         x = MaxPooling2D(pool_size=(1, 3))(x)
 37 | 
 38 |         x = Conv2D(filters=16, kernel_size=(1, 3), activation="relu")(x)
 39 |         x = MaxPooling2D(pool_size=(1, 3))(x)
 40 |         shape = K.int_shape(x)
 41 |         x = Reshape((-1, shape[2] * shape[3]))(x)
 42 |     else:
 43 |         x = input
 44 |     return x
 45 | 
 46 | 
 47 | def embedding_RNN_1_lstm(input_shape, conv=False, dropout=False, att=False):
 48 | 
 49 |     device = device_lib.list_local_devices()[0].device_type
 50 | 
 51 |     input = Input(batch_shape=input_shape)
 52 | 
 53 |     x = conv_module(conv, input_shape, input)
 54 | 
 55 |     if att:
 56 |         return_sequence = True
 57 |     else:
 58 |         return_sequence = False
 59 | 
 60 |     if device == 'CPU':
 61 |         if dropout:
 62 |             x = Bidirectional(LSTM(units=8, return_sequences=return_sequence, dropout=dropout))(x)
 63 |             x = Dropout(dropout)(x)
 64 |         else:
 65 |             x = Bidirectional(LSTM(units=8, return_sequences=return_sequence))(x)
 66 |     else:
 67 |         x = Bidirectional(CuDNNLSTM(units=8, return_sequences=return_sequence))(x)
 68 | 
 69 |     if att == "feedforward":
 70 |         print(K.shape(x))
 71 |         x, attention = Attention(return_attention=True)(x)
 72 |     elif att == "selfatt":
 73 |         attention = Conv1D(filters=16, kernel_size=1, activation='tanh', padding='same', use_bias=True,
 74 |                            kernel_initializer='glorot_uniform', bias_initializer='zeros',
 75 |                            name="attention_layer1")(x)
 76 |         attention = Conv1D(filters=16, kernel_size=1, activation='linear', padding='same',
 77 |                            use_bias=True,
 78 |                            kernel_initializer='glorot_uniform', bias_initializer='zeros',
 79 |                            name="attention_layer2")(attention)
 80 |         attention = Lambda(lambda x: softmax(x, axis=1), name="attention_vector")(attention)
 81 | 
 82 |         # Apply attention weights
 83 |         weighted_sequence_embedding = Dot(axes=[1, 1], normalize=False, name="weighted_sequence_embedding")(
 84 |             [attention, x])
 85 | 
 86 |         # Add and normalize to obtain final sequence embedding
 87 |         x = Lambda(lambda x: K.l2_normalize(K.sum(x, axis=1)))(weighted_sequence_embedding)
 88 |         attention = weighted_sequence_embedding
 89 |     else:
 90 |         attention = None
 91 | 
 92 |     return x, input, attention
 93 | 
 94 | 
 95 | def RNN_model_definition(input_shape,
 96 |                          conv,
 97 |                          dropout,
 98 |                          attention,
 99 |                          output_shape):
100 |     x, input, att_vector = embedding_RNN_1_lstm(input_shape=input_shape,
101 |                                                 conv=conv,
102 |                                                 dropout=dropout,
103 |                                                 att=attention)
104 | 
105 |     # print("attention shape {}".format(K.shape(att_vector)))
106 | 
107 |     outputs = [Dense(output_shape, activation='sigmoid')(x), att_vector]
108 | 
109 |     model = Model(inputs=input, outputs=outputs)
110 | 
111 |     # model.compile(optimizer='adam',
112 |     #               loss='binary_crossentropy',
113 |     #               metrics=['accuracy'])
114 | 
115 |     return model, input, att_vector
116 | 
117 | 
118 | def train_RNN_batch(list_feature_fold_train,
119 |                     labels_fold_train,
120 |                     list_feature_fold_val,
121 |                     labels_fold_val,
122 |                     batch_size,
123 |                     input_shape,
124 |                     output_shape,
125 |                     file_path_model,
126 |                     filename_log,
127 |                     epoch,
128 |                     patience,
129 |                     scaler,
130 |                     attention,
131 |                     conv,
132 |                     dropout,
133 |                     summ=False,
134 |                     verbose=2):
135 | 
136 |     x, input, att_vector = embedding_RNN_1_lstm(input_shape=input_shape,
137 |                                                 conv=conv,
138 |                                                 dropout=dropout,
139 |                                                 att=attention)
140 | 
141 |     # print("attention shape {}".format(K.shape(att_vector)))
142 | 
143 |     outputs = Dense(output_shape, activation='sigmoid')(x)
144 | 
145 |     model = Model(inputs=input, outputs=outputs)
146 | 
147 |     model.compile(optimizer='adam',
148 |                   loss='binary_crossentropy',
149 |                   metrics=['accuracy'])
150 | 
151 |     if summ:
152 |         model.summary()
153 | 
154 |     callbacks = [ModelCheckpoint(file_path_model, monitor='val_loss', verbose=0, save_best_only=True),
155 |                  EarlyStopping(monitor='val_loss', patience=patience, verbose=0),
156 |                  CSVLogger(filename=filename_log, separator=';')]
157 | 
158 |     print("start training with validation...")
159 | 
160 |     generator_train = generator_batch1(list_feature=list_feature_fold_train,
161 |                                        labels=labels_fold_train,
162 |                                        scaler=scaler)
163 | 
164 |     generator_val = generator_batch1(list_feature=list_feature_fold_val,
165 |                                      labels=labels_fold_val,
166 |                                      scaler=scaler)
167 | 
168 |     model.fit_generator(generator=generator_train,
169 |                         steps_per_epoch=len(list_feature_fold_train)/batch_size,
170 |                         validation_data=generator_val,
171 |                         validation_steps=len(list_feature_fold_val)/batch_size,
172 |                         callbacks=callbacks,
173 |                         epochs=epoch,
174 |                         verbose=verbose)
175 | 
176 |     return model
177 | 
178 | 
179 | def eval_RNN_model(list_feature_test,
180 |                    labels_test,
181 |                    file_path_model,
182 |                    attention,
183 |                    scaler):
184 |     if attention == "feedforward":
185 |         model = load_model(filepath=file_path_model,
186 |                            custom_objects={'Attention': Attention(return_attention=True)})
187 |     elif attention == "selfatt":
188 |         model = load_model(filepath=file_path_model,
189 |                            custom_objects={'softmax': softmax})
190 |     else:
191 |         model = load_model(file_path_model)
192 | 
193 |     list_y_pred = np.zeros((len(labels_test, )))
194 |     for ii in range(len(list_feature_test)):
195 |         fea = list_feature_test[ii]
196 |         fea = scaler.transform(fea)
197 |         fea = np.expand_dims(fea, axis=0)
198 |         y_pred = model.predict_on_batch(fea)
199 |         list_y_pred[ii] = y_pred[0][0]
200 | 
201 |     loss_test = log_loss(y_true=labels_test, y_pred=list_y_pred)
202 | 
203 |     return loss_test


--------------------------------------------------------------------------------
/neural_net/training_scripts/models_TCN.py:
--------------------------------------------------------------------------------
 1 | from neural_net.keras_tcn.tcn import tcn
 2 | from neural_net.training_scripts.generator import generator_batch1
 3 | from keras.callbacks import EarlyStopping
 4 | from keras.callbacks import CSVLogger
 5 | from keras.callbacks import ModelCheckpoint
 6 | 
 7 | 
 8 | def train_TCN_batch(list_feature_fold_train,
 9 |                     labels_fold_train,
10 |                     list_feature_fold_val,
11 |                     labels_fold_val,
12 |                     batch_size,
13 |                     input_shape,
14 |                     file_path_model,
15 |                     filename_log,
16 |                     epoch,
17 |                     patience,
18 |                     scaler,
19 |                     dropout,
20 |                     summ=False,
21 |                     verbose=2):
22 | 
23 |     model, param_str = tcn.dilated_tcn(output_slice_index='last',  # try 'first'.
24 |                                        num_feat=input_shape[-1],
25 |                                        num_classes=2,
26 |                                        nb_filters=16,
27 |                                        kernel_size=3,
28 |                                        dilatations=[0, 1, 3, 5],
29 |                                        nb_stacks=1,
30 |                                        max_len=None,
31 |                                        dropout=dropout,
32 |                                        activation='norm_relu',
33 |                                        use_skip_connections=False,
34 |                                        return_param_str=True)
35 | 
36 |     if summ:
37 |         model.summary()
38 | 
39 |     callbacks = [ModelCheckpoint(file_path_model, monitor='val_loss', verbose=0, save_best_only=True),
40 |                  EarlyStopping(monitor='val_loss', patience=patience, verbose=0),
41 |                  CSVLogger(filename=filename_log, separator=';')]
42 | 
43 |     print("start training with validation...")
44 | 
45 |     generator_train = generator_batch1(list_feature=list_feature_fold_train,
46 |                                        labels=labels_fold_train,
47 |                                        scaler=scaler)
48 | 
49 |     generator_val = generator_batch1(list_feature=list_feature_fold_val,
50 |                                      labels=labels_fold_val,
51 |                                      scaler=scaler)
52 | 
53 |     model.fit_generator(generator=generator_train,
54 |                         steps_per_epoch=len(list_feature_fold_train)/batch_size,
55 |                         validation_data=generator_val,
56 |                         validation_steps=len(list_feature_fold_val)/batch_size,
57 |                         callbacks=callbacks,
58 |                         epochs=epoch,
59 |                         verbose=verbose)
60 | 
61 |     return model


--------------------------------------------------------------------------------
/neural_net/utils/audio_preprocessing.py:
--------------------------------------------------------------------------------
  1 | from madmom.processors import SequentialProcessor
  2 | import numpy as np
  3 | 
  4 | EPSILON = np.spacing(1)
  5 | 
  6 | 
  7 | def Fprev_sub(x,w=2):
  8 |     """
  9 |     # D = prev_sub(X,W) calculate the shifted x, with shifting frames 2
 10 |     input feature*frame
 11 |     """
 12 |     # pad data by repeating first and last columns
 13 |     if w > 0:
 14 |         # shift to right
 15 |         xx = np.hstack((np.tile(x[:,0], (w,1)).transpose(), x[:,:-w]))
 16 |     if w < 0:
 17 |         # shift to left
 18 |         xx = np.hstack((x[:,-w:], np.tile(x[:,-1], (-w,1)).transpose()))
 19 |     if w==0:
 20 |         raise ValueError("shifting frame coef can't be 0.")
 21 | 
 22 |     # plt.figure()
 23 |     # plt.pcolormesh(xx)
 24 |     # plt.show()
 25 | 
 26 |     return xx
 27 | 
 28 | 
 29 | def _nbf_2D(log_mel, nlen):
 30 |     """shift the feature and concatenate it in both left and right sides for nlen"""
 31 | 
 32 |     log_mel = np.array(log_mel).transpose()
 33 |     log_mel_out = np.array(log_mel, copy=True)
 34 |     for ii in range(1, nlen + 1):
 35 |         log_mel_right_shift = Fprev_sub(log_mel, w=ii)
 36 |         log_mel_left_shift = Fprev_sub(log_mel, w=-ii)
 37 |         log_mel_out = np.vstack((log_mel_right_shift, log_mel_out, log_mel_left_shift))
 38 |     feature = log_mel_out.transpose()
 39 |     return feature
 40 | 
 41 | 
 42 | class MadmomMelbankProcessor(SequentialProcessor):
 43 | 
 44 |     def __init__(self, fs, hopsize_t):
 45 |         from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
 46 |         from madmom.audio.stft import ShortTimeFourierTransformProcessor
 47 |         from madmom.audio.filters import MelFilterbank
 48 |         from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
 49 |                                               LogarithmicSpectrogramProcessor)
 50 | 
 51 |         # define pre-processing chain
 52 |         sig = SignalProcessor(num_channels=1, sample_rate=fs)
 53 |         frames = FramedSignalProcessor(frame_size=2048, hopsize=int(fs*hopsize_t))
 54 |         stft = ShortTimeFourierTransformProcessor()  # caching FFT window
 55 |         filt = FilteredSpectrogramProcessor(
 56 |             filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000,
 57 |             norm_filters=True, unique_filters=False)
 58 |         spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON)
 59 | 
 60 |         single = SequentialProcessor([frames, stft, filt, spec])
 61 | 
 62 |         pre_processor = SequentialProcessor([sig, single])
 63 | 
 64 |         super(MadmomMelbankProcessor, self).__init__([pre_processor])
 65 | 
 66 | 
 67 | def get_log_mel_madmom(audio_fn, fs, hopsize_t, channel, context=False):
 68 |     """
 69 |     calculate log mel feature by madmom
 70 |     :param audio_fn:
 71 |     :param fs:
 72 |     :param hopsize_t:
 73 |     :param channel:
 74 |     :return:
 75 |     """
 76 |     madmomMelbankProc = MadmomMelbankProcessor(fs, hopsize_t)
 77 |     mfcc = madmomMelbankProc(audio_fn)
 78 | 
 79 |     if context:
 80 |         if channel == 1:
 81 |             mfcc = _nbf_2D(mfcc, 7)
 82 |         else:
 83 |             mfcc_conc = []
 84 |             for ii in range(3):
 85 |                 mfcc_conc.append(_nbf_2D(mfcc[:,:,ii], 7))
 86 |             mfcc = np.stack(mfcc_conc, axis=2)
 87 | 
 88 |     return mfcc
 89 | 
 90 | 
 91 | def feature_reshape(feature, nlen=10):
 92 |     """
 93 |     reshape mfccBands feature into n_sample * n_row * n_col
 94 |     :param feature:
 95 |     :param nlen:
 96 |     :return:
 97 |     """
 98 | 
 99 |     n_sample = feature.shape[0]
100 |     n_row = 80
101 |     n_col = nlen*2+1
102 | 
103 |     feature_reshaped = np.zeros((n_sample,n_row,n_col),dtype='float32')
104 |     # print("reshaping feature...")
105 |     for ii in range(n_sample):
106 |         # print ii
107 |         feature_frame = np.zeros((n_row,n_col),dtype='float32')
108 |         for jj in range(n_col):
109 |             feature_frame[:,jj] = feature[ii][n_row*jj:n_row*(jj+1)]
110 |         feature_reshaped[ii,:,:] = feature_frame
111 |     return feature_reshaped
112 | 
113 | 
114 | def segmentMfccLine(line, hopsize_t, mfccs):
115 |     """
116 |     segment line level mfccs
117 |     :param line: [start_time, end_time, lyrics]
118 |     :return:
119 |     """
120 |     # start and end time
121 |     time_start = line[0]
122 |     time_end = line[1]
123 |     frame_start = int(round(time_start / hopsize_t))
124 |     frame_end = int(round(time_end / hopsize_t))
125 | 
126 |     # log_mel_reshape line
127 |     mfccs_line = mfccs[:, frame_start: frame_end]
128 |     return mfccs_line


--------------------------------------------------------------------------------
/neural_net/utils/csv_preprocessing.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | 
 4 | def open_csv_recordings(filename):
 5 |     recordings = []
 6 |     with open(filename) as csvfile:
 7 |         readCSV = csv.reader(csvfile, delimiter=',')
 8 |         for row in readCSV:
 9 |             recordings.append(row)
10 |     return recordings
11 | 
12 | 
13 | def write_csv_two_columns_list(two_columns_list, filename):
14 |     with open(filename, 'wb') as csvfile:
15 |         two_columns_writer = csv.writer(csvfile, delimiter=',')
16 |         for l in two_columns_list:
17 |             two_columns_writer.writerow(l)
18 | 


--------------------------------------------------------------------------------
/neural_net/utils/textgridParser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import sys, os
  3 | 
  4 | currentPath = os.path.dirname(__file__)
  5 | utilsPath = os.path.join(currentPath, 'utils')
  6 | sys.path.append(utilsPath)
  7 | 
  8 | import neural_net.utils.textgrid as tgp
  9 | 
 10 | 
 11 | def textGrid2WordList(textgrid_file, whichTier = 'pinyin', utf16 = True):
 12 |     '''
 13 |     parse textGrid into a python list of tokens 
 14 |     @param whichTier : 'pinyin' default tier name  
 15 |     '''	
 16 |     if not os.path.isfile(textgrid_file): raise Exception("file {} not found".format(textgrid_file))
 17 |     beginTsAndWordList = []
 18 | 
 19 |     if utf16:
 20 |         par_obj = tgp.TextGrid.loadUTF16(textgrid_file)	#loading the object
 21 |     else:
 22 |         par_obj = tgp.TextGrid.load(textgrid_file)	#loading the object
 23 | 
 24 |     tiers = tgp.TextGrid._find_tiers(par_obj)	#finding existing tiers
 25 | 
 26 |     isTierFound = False
 27 |     for tier in tiers:
 28 |         tierName= tier.tier_name().replace('.', '')
 29 |         #iterating over tiers and selecting the one specified
 30 |         if tierName == whichTier:
 31 |             isTierFound = True
 32 |             #this function parse the file nicely and return cool tuples
 33 |             tier_details = tier.make_simple_transcript()
 34 | 
 35 |             for line in tier_details:
 36 |                 beginTsAndWordList.append([float(line[0]), float(line[1]), line[2]])
 37 | 
 38 |     if not isTierFound:
 39 |         print ('Missing tier {1} in file {0}' .format(textgrid_file, whichTier))
 40 | 
 41 |     return beginTsAndWordList, isTierFound
 42 | 
 43 | 
 44 | def line2WordList(line, entireWordList):
 45 |     '''
 46 |     find the nested wordList of entireWordList by line tuple
 47 |     :param line: line tuple [startTime, endTime, string]
 48 |     :param entireWordList: entire word list
 49 |     :return: nested wordList
 50 |     '''
 51 |     nestedWordList = []
 52 |     vault = False
 53 |     for wordlist in entireWordList:
 54 |          # the ending of the line
 55 |         if wordlist[1] == line[1]:
 56 |             nestedWordList.append(wordlist)
 57 |             break
 58 |         # the beginning of the line
 59 |         if wordlist[0] == line[0]:
 60 |             vault = True
 61 |         if vault == True:
 62 |             nestedWordList.append(wordlist)
 63 | 
 64 |     return nestedWordList
 65 | 
 66 | def wordListsParseByLines(entireLine, entireWordList):
 67 |     '''
 68 |     find the wordList for each line, cut the word list according to line
 69 |     :param entireLine: entire lines in line tier
 70 |     :param entirewWordList: entire word lists in pinyin tier
 71 |     :return:
 72 |     nestedWordLists: [[line0, wordList0], [line1, wordList1], ...]
 73 |     numLines: sum of number of lines
 74 |     numWords: sum of number of words
 75 |     '''
 76 |     nestedWordLists     = []
 77 |     numLines            = 0
 78 |     numWords            = 0
 79 | 
 80 |     for line in entireLine:
 81 |         # asciiLine=line[2].encode("ascii", "replace")
 82 |         if len(line[2].replace(" ", "")):                                      # if line is not empty
 83 |             numLines        += 1
 84 |             nestedWordList  = []
 85 |             wordList        = line2WordList(line, entireWordList)
 86 |             for word in wordList:
 87 |                 # asciiWord = word[2].encode("ascii", "replace")
 88 |                 if len(word[2].replace(" ", "")):                              # if word is not empty
 89 |                     numWords += 1
 90 |                     nestedWordList.append(word)
 91 |             nestedWordLists.append([line,nestedWordList])
 92 | 
 93 |     return nestedWordLists, numLines, numWords
 94 | 
 95 | def syllableTextgridExtraction(textgrid_path, recording, tier0, tier1):
 96 | 
 97 |     '''
 98 |     Extract syllable boundary and phoneme boundary from textgrid
 99 |     :param textgrid_path:
100 |     :param recording:
101 |     :param tier0: parent tier
102 |     :param tier1: child tier which should be covered by parent tier
103 |     :return:
104 |     nestedPhonemeList, element[0] - syllable, element[1] - a list containing the phoneme of the syllable
105 |     '''
106 | 
107 |     textgrid_file   = os.path.join(textgrid_path,recording+'.TextGrid')
108 | 
109 |     syllableList    = textGrid2WordList(textgrid_file, whichTier=tier0)
110 |     phonemeList     = textGrid2WordList(textgrid_file, whichTier=tier1)
111 | 
112 |     # parse syllables of groundtruth
113 |     nestedPhonemeLists, numSyllables, numPhonemes   = wordListsParseByLines(syllableList, phonemeList)
114 | 
115 |     return nestedPhonemeLists, numSyllables, numPhonemes
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/neural_net/utils/textgrid_preprocessing.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from neural_net.utils.textgridParser import textGrid2WordList
 3 | from neural_net.utils.textgridParser import wordListsParseByLines
 4 | 
 5 | 
 6 | def parse_syllable_line_list(ground_truth_text_grid_file, parent_tier, child_tier):
 7 | 
 8 |     if not os.path.isfile(ground_truth_text_grid_file):
 9 |         is_file_exist = False
10 |         return False, is_file_exist, False
11 |     else:
12 |         is_file_exist = True
13 | 
14 |         # parse line
15 |         line_list, _ = textGrid2WordList(ground_truth_text_grid_file, whichTier=parent_tier)
16 | 
17 |         # parse syllable
18 |         syllable_list, is_syllable_found = textGrid2WordList(ground_truth_text_grid_file, whichTier=child_tier)
19 | 
20 |         # parse lines of ground truth
21 |         nested_syllable_lists, _, _ = wordListsParseByLines(line_list, syllable_list)
22 | 
23 |         return nested_syllable_lists, is_file_exist, is_syllable_found
24 | 


--------------------------------------------------------------------------------
/neural_net/utils/utils_functions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def smooth_obs(obs):
 5 |     """
 6 |     hanning window smooth the onset observation function
 7 |     :param obs: syllable/phoneme onset function
 8 |     :return:
 9 |     """
10 |     hann = np.hanning(5)
11 |     hann /= np.sum(hann)
12 | 
13 |     obs = np.convolve(hann, obs, mode='same')
14 | 
15 |     return obs


--------------------------------------------------------------------------------
/neural_net/viterbiDecodingPhonemeSeg.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import norm
  3 | from neural_net.parameters import hopsize_t
  4 | cimport cython
  5 | 
  6 | value_eps = np.finfo(float).eps
  7 | 
  8 | def FdurationProba2( syllable_duration, param_s ):
  9 | 
 10 |     M1      = syllable_duration/hopsize_t
 11 | 
 12 |     # % delta
 13 |     if param_s['delta_mode'] == 'constant':
 14 |         delta   = param_s['delta']
 15 |     elif param_s['delta_mode'] == 'proportion':
 16 |         delta   = syllable_duration * param_s['delta']
 17 |     else:
 18 |         raise('Error: delta_default should be either constant or proportion.')
 19 |     S1          = delta/hopsize_t
 20 | 
 21 |     # % duration max is three times of standard deviation
 22 |     duration_max = syllable_duration + 3.0*delta
 23 | 
 24 |     tmin = 0
 25 | 
 26 |     tmax = int(duration_max/hopsize_t)
 27 | 
 28 |     # Ps = pdf('Normal',(tmin : tmax), M1, S1)
 29 |     x = range(tmin, tmax)
 30 |     Ps = norm.pdf(x, M1, S1)
 31 |     return Ps, tmin, tmax
 32 | 
 33 | @cython.cdivision(True)
 34 | @cython.boundscheck(False)
 35 | @cython.wraparound(False)
 36 | def viterbiSegmental2(P, sd, param_s):
 37 |     """
 38 |     :param P: NxT emission probability state sequence (P(j,t) = emission probability of symbol j at time t)
 39 |     :param sd: 1xT score duration array
 40 |     :param param_s:
 41 |     :return:
 42 |     """
 43 | 
 44 |     # preventsingularities
 45 |     P[P == 0]     = value_eps
 46 | 
 47 |     i_bound = np.where(P > value_eps)[0]
 48 |     N = len(i_bound)
 49 |     T = len(sd)
 50 |     
 51 |     # log - likelihood
 52 |     delta   = np.zeros((N, T), dtype=np.double)
 53 |     psi     = np.zeros((N, T), dtype=np.double)
 54 |     logP    = np.log(P, dtype=np.double)
 55 | 
 56 |     # duration probability
 57 |     Ps, _, _  = FdurationProba2(sd[0], param_s)
 58 |     Ps[Ps == 0]     = value_eps
 59 |     C               = len(Ps)
 60 |     logPs           = np.log(Ps, dtype=np.double)
 61 | 
 62 |     cdef double [:, ::1] cdelta = delta
 63 |     cdef double [:, ::1] cpsi   = psi
 64 |     cdef double [::1] clogP     = logP
 65 |     cdef double [::1] clogPs    = logPs
 66 |     cdef int [::1] ci_bound     = np.array(i_bound, dtype=np.intc)
 67 |     # % % % % % % % % % % % % % % % % % %
 68 |     # % Initialisation %
 69 |     # % % % % % % % % % % % % % % % % % %
 70 |     
 71 |     # % not a possible transition from > 0 time to 1
 72 |     cdelta[0, 0]     = -np.inf
 73 |     cpsi[:,0]        = 0
 74 |     for jj in range(1,N):
 75 |         d = ci_bound[jj] - ci_bound[0]
 76 |         # print(jj, i_bound[jj], d, C)
 77 |         if d >= C:
 78 |             cdelta[jj, 0] = -np.inf
 79 |         else:
 80 |             cdelta[jj, 0] = clogPs[d] + clogP[i_bound[jj]]
 81 | 
 82 |     clogPs = None
 83 |     
 84 |     # % % % % % % % % % % % % % % % % % %
 85 |     # % Recursion %
 86 |     # % % % % % % % % % % % % % % % % % %
 87 |     delta_current = np.zeros((N,), dtype=np.double)
 88 |     cdef double [::1] cdelta_current = delta_current
 89 | 
 90 |     for t in range(1,T - 1):
 91 |         # print(t)
 92 |         # % duration probability
 93 |         Ps, _, _        = FdurationProba2(sd[t], param_s)
 94 |         Ps[Ps == 0]     = value_eps
 95 |         C               = len(Ps)
 96 |         logPs           = np.log(Ps, dtype=np.double)
 97 | 
 98 |         for jj in range(N):
 99 |             for ii in range(N):
100 |                 # print(i_bound, jj, ii)
101 |                 d = ci_bound[jj] - ci_bound[ii]
102 |                 # print(d, C)
103 |                 if d >= C or d <= 0:
104 |                     cdelta_current[ii] = -np.inf
105 |                 else:
106 |                     cdelta_current[ii] = cdelta[ii, t - 1] + logPs[d]
107 | 
108 |             I_delta             = np.argmax(cdelta_current)
109 |             M_delta             = cdelta_current[I_delta]
110 |             cdelta[jj, t]        = M_delta + clogP[i_bound[jj]] # add emission because it's a constance
111 |             cpsi[jj, t]          = I_delta
112 |     
113 |     # % duration probability
114 |     Ps, tmin, tmax  = FdurationProba2(sd[T-1], param_s)
115 |     Ps[Ps == 0]     = value_eps
116 |     C               = len(Ps)
117 |     logPs           = np.log(Ps, dtype=np.double)
118 |     clogPs          = logPs
119 |     # delta_current   = np.zeros((N,))
120 | 
121 |     for ii in range(N):
122 |         d = ci_bound[N-1] - ci_bound[ii]
123 |         if d >= C or d <= 0:
124 |             cdelta_current[ii] = -np.inf
125 |         else:
126 |             cdelta_current[ii] = cdelta[ii, T-2] + clogPs[d]
127 | 
128 |     I_delta             = np.argmax(cdelta_current)
129 |     M_delta             = cdelta_current[I_delta] # the posterior proba
130 |     cdelta[N-1, T-1]        = M_delta + clogP[i_bound[N-1]]
131 |     cpsi[N-1, T-1]          = I_delta
132 | 
133 |     # % % % % % % % % % % % % % % % % % %
134 |     # % Backtrack %
135 |     # % % % % % % % % % % % % % % % % % %
136 |     i_best_sequence = np.zeros((T+1,),dtype=int)
137 |     # print(i_best_sequence)
138 |     i_best_sequence[T] = N-1
139 |     for t in range(T)[::-1]:
140 |         # print(t+1, i_best_sequence[t+1])
141 |         i_best_sequence[t] = int(cpsi[int(i_best_sequence[t + 1]), t])
142 |     # print(i_best_sequence)
143 |     i_boundary = [i_bound[ii] for ii in i_best_sequence]
144 | 
145 |     cdelta          = None
146 |     cdelta_current  = None
147 |     clogPs          = None
148 |     clogP           = None
149 |     cpsi            = None
150 |     ci_bound        = None
151 | 
152 |     return i_boundary
153 | 


--------------------------------------------------------------------------------