├── .Zap.0.align.py ├── Calign.py.bak ├── Calign2mlf.py ├── README.md ├── mlf2txtgrid.py ├── run ├── Calign2textgrid.py ├── MissingWords └── model │ ├── 8000 │ ├── config │ ├── hmmdefs │ └── macros │ ├── 16000 │ ├── config │ ├── hmmdefs │ └── macros │ ├── dict │ ├── monophones │ └── puncs └── test ├── .Zap.0.puncs ├── MissingWords ├── dict ├── o.test_16000.align ├── o.test_8000.align ├── puncs ├── res1 ├── test1.TextGrid ├── test1.mlf ├── test1.txt ├── test1.wav ├── test_16000.align ├── test_16000.txt ├── test_16000.wav ├── test_8000.align ├── test_8000.txt └── test_8000.wav /.Zap.0.align.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ Usage: 4 | align.py wavfile trsfile output_file 5 | """ 6 | 7 | import os 8 | import sys 9 | import wave 10 | import codecs 11 | 12 | def prep_wav(orig_wav, out_wav): 13 | f = wave.open(orig_wav, 'r') 14 | SR = f.getframerate() 15 | f.close() 16 | if (SR <> 16000): 17 | os.system("sox " + orig_wav + " -r 16000 " + out_wav + " polyphase") 18 | else: 19 | os.system("cp -f " + orig_wav + " " + out_wav) 20 | 21 | 22 | def prep_mlf(trsfile, mlffile): 23 | 24 | f = codecs.open('./model/dict', 'r', 'utf-8') 25 | lines = f.readlines() 26 | f.close() 27 | dict = [] 28 | for line in lines: 29 | dict.append(line.split()[0]) 30 | 31 | f = codecs.open(trsfile, 'r', 'utf-8') 32 | lines = f.readlines() 33 | f.close() 34 | 35 | fw = codecs.open(mlffile, 'w', 'utf-8') 36 | fw.write('#!MLF!#\n') 37 | fw.write('"*/tmp.lab"\n') 38 | fw.write('sp\n') 39 | i = 0 40 | while (i < len(lines)): 41 | txt = lines[i].replace('\n', '') 42 | txt = txt.replace('{breath}', 'br').replace('{noise}', 'ns') 43 | txt = txt.replace('{laugh}', 'lg').replace('{laughter}', 'lg') 44 | txt = txt.replace('{cough}', 'cg').replace('{lipsmack}', 'ls') 45 | for pun in [',', '.', ':', ';', '!', '?', '"', '%', '-']: 46 | txt = txt.replace(pun, '') 47 | for wrd in txt.split(): 48 | if (wrd in dict): 49 | fw.write(wrd + '\n') 50 | fw.write('sp\n') 51 | i += 1 52 | fw.write('.\n') 53 | fw.close() 54 | 55 | 56 | def TextGrid(infile1, infile2, outfile): 57 | 58 | f = codecs.open(infile1, 'r', 'utf-8') 59 | lines = f.readlines() 60 | f.close() 61 | 62 | f = codecs.open(infile2, 'r', 'utf-8') 63 | lines2 = f.readlines() 64 | f.close() 65 | words = [] 66 | for line in lines2[2:-1]: 67 | if (line.strip() <> 'sp'): 68 | words.append(line.strip()) 69 | words.reverse() 70 | 71 | fw = codecs.open(outfile, 'w', 'utf-8') 72 | 73 | j = 2 74 | phons = [] 75 | wrds = [] 76 | while (lines[j] <> '.\n'): 77 | ph = lines[j].split()[2] 78 | st = float(lines[j].split()[0])/10000000.0 + 0.0125 79 | en = float(lines[j].split()[1])/10000000.0 + 0.0125 80 | if (st <> en): 81 | phons.append([ph, st, en]) 82 | 83 | if (len(lines[j].split()) == 5): 84 | wrd = lines[j].split()[4].replace('\n', '') 85 | st = float(lines[j].split()[0])/10000000.0 + 0.0125 86 | en = float(lines[j].split()[1])/10000000.0 + 0.0125 87 | if (st <> en): 88 | wrds.append([wrd, st]) 89 | j += 1 90 | 91 | #write the phone interval tier 92 | fw.write('File type = "ooTextFile short"\n') 93 | fw.write('"TextGrid"\n') 94 | fw.write('\n') 95 | fw.write(str(phons[0][1]) + '\n') 96 | fw.write(str(phons[-1][2]) + '\n') 97 | fw.write('\n') 98 | fw.write('2\n') 99 | fw.write('"IntervalTier"\n') 100 | fw.write('"phone"\n') 101 | fw.write(str(phons[0][1]) + '\n') 102 | fw.write(str(phons[-1][-1]) + '\n') 103 | fw.write(str(len(phons)) + '\n') 104 | for k in range(len(phons)): 105 | fw.write(str(phons[k][1]) + '\n') 106 | fw.write(str(phons[k][2]) + '\n') 107 | fw.write('"' + phons[k][0] + '"' + '\n') 108 | 109 | #write the word interval tier 110 | fw.write('"IntervalTier"\n') 111 | fw.write('"word"\n') 112 | fw.write(str(phons[0][1]) + '\n') 113 | fw.write(str(phons[-1][-1]) + '\n') 114 | fw.write(str(len(wrds)) + '\n') 115 | for k in range(len(wrds) - 1): 116 | fw.write(str(wrds[k][1]) + '\n') 117 | fw.write(str(wrds[k+1][1]) + '\n') 118 | if (wrds[k][0] == 'sp'): 119 | fw.write('"sp"\n') 120 | else: 121 | w = words.pop() 122 | fw.write('"' + w + '"\n') 123 | fw.write(str(wrds[-1][1]) + '\n') 124 | fw.write(str(phons[-1][2]) + '\n') 125 | if (wrds[-1][0] == 'sp'): 126 | fw.write('"sp"\n') 127 | else: 128 | w = words.pop() 129 | fw.write('"' + w + '"\n') 130 | 131 | if (len(words) <> 0): 132 | print words 133 | print '!!!words and phones are mismatched!!!' 134 | fw.close() 135 | 136 | if __name__ == '__main__': 137 | 138 | try: 139 | wavfile = sys.argv[1] 140 | trsfile = sys.argv[2] 141 | outfile = sys.argv[3] 142 | except IndexError: 143 | print __doc__ 144 | 145 | 146 | # create working directory 147 | os.system("rm -r -f ./tmp") 148 | os.system("mkdir ./tmp") 149 | 150 | #prepare wavefile 151 | prep_wav(wavfile, './tmp/tmp.wav') 152 | 153 | #prepare mlfile 154 | prep_mlf(trsfile, './tmp/tmp.mlf') 155 | 156 | #prepare scp files 157 | fw = open('./tmp/codetr.scp', 'w') 158 | fw.write('./tmp/tmp.wav ./tmp/tmp.plp\n') 159 | fw.close() 160 | fw = open('./tmp/test.scp', 'w') 161 | fw.write('./tmp/tmp.plp\n') 162 | fw.close() 163 | 164 | #call plp.sh and align.sh 165 | os.system('HCopy -T 1 -C ./model/16000/config -S ./tmp/codetr.scp') 166 | os.system('HVite -T 1 -a -m -I ./tmp/tmp.mlf -H ./model/16000/macros -H ./model/16000/hmmdefs -S ./tmp/test.scp -i ./tmp/aligned.mlf -p 0.0 -s 5.0 ./model/dict ./model/monophones > ./tmp/aligned.results') 167 | os.path.split(trsfile)[1].split('.')[0] + '.TextGrid' 168 | 169 | TextGrid('./tmp/aligned.mlf', './tmp/tmp.mlf', outfile) 170 | -------------------------------------------------------------------------------- /Calign.py.bak: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ Usage: 4 | Calign.py [options] wavfile trsfile output_file 5 | where options may include: 6 | -r sampling_rate -- override which sampling rate model to use, either 8000 or 16000 7 | -a user_supplied_dictionary -- encoded in utf8, the dictionary will be combined with the dictionary in the model 8 | -d user_supplied_dictionary -- encoded in utf8, the dictionary will be used alone, NOT combined with the dictionary in the model 9 | -p punctuations -- encoded in utf8, punctuations and other symbols in this file will be deleted in forced alignment, the default is to use "puncs" in the model 10 | """ 11 | 12 | import os 13 | import sys 14 | import getopt 15 | import wave 16 | import codecs 17 | import io 18 | 19 | HOMEDIR = '/Users/xuchenzi/Desktop/Forced_Aligner_Mandarin' 20 | MODEL_DIR = HOMEDIR + '/model' 21 | 22 | missing = io.open('MissingWords', 'w', encoding='utf8') 23 | 24 | def prep_mlf(trsfile, tmpbase): 25 | 26 | f = codecs.open(tmpbase + '.dict', 'r', 'utf-8') 27 | lines = f.readlines() 28 | f.close() 29 | dict = [] 30 | for line in lines: 31 | dict.append(line.split()[0]) 32 | f = codecs.open(tmpbase + '.puncs', 'r', 'utf-8') 33 | lines = f.readlines() 34 | f.close() 35 | puncs = [] 36 | for line in lines: 37 | puncs.append(line.strip()) 38 | 39 | f = codecs.open(trsfile, 'r', 'utf-8') 40 | lines = f.readlines() 41 | f.close() 42 | 43 | fw = codecs.open(tmpbase + '.mlf', 'w', 'utf-8') 44 | fw.write('#!MLF!#\n') 45 | fw.write('"' + tmpbase + '.lab"\n') 46 | fw.write('sp\n') 47 | i = 0 48 | unks = set() 49 | while (i < len(lines)): 50 | txt = lines[i].replace('\n', '') 51 | txt = txt.replace('{breath}', 'br').replace('{noise}', 'ns') 52 | txt = txt.replace('{laugh}', 'lg').replace('{laughter}', 'lg') 53 | txt = txt.replace('{cough}', 'cg').replace('{lipsmack}', 'ls') 54 | for pun in puncs: 55 | txt = txt.replace(pun, '') 56 | for wrd in txt.split(): 57 | if (wrd in dict): 58 | fw.write(wrd + '\n') 59 | fw.write('sp\n') 60 | else: 61 | unks.add(wrd) 62 | i += 1 63 | fw.write('.\n') 64 | fw.close() 65 | return unks 66 | 67 | 68 | def gen_res(infile1, infile2, outfile): 69 | 70 | f = codecs.open(infile1, 'r', 'utf-8') 71 | lines = f.readlines() 72 | f.close() 73 | 74 | f = codecs.open(infile2, 'r', 'utf-8') 75 | lines2 = f.readlines() 76 | f.close() 77 | words = [] 78 | for line in lines2[2:-1]: 79 | if (line.strip() != 'sp'): 80 | words.append(line.strip()) 81 | words.reverse() 82 | 83 | fw = codecs.open(outfile, 'w', 'utf-8') 84 | fw.write(lines[0]) 85 | fw.write(lines[1]) 86 | for line in lines[2:-1]: 87 | if ((line.split()[-1].strip() == 'sp') or (len(line.split()) != 5)): 88 | fw.write(line) 89 | else: 90 | fw.write(line.split()[0] + ' ' + line.split()[1] + ' ' + line.split()[2] + ' ' + line.split()[3] + ' ' + words.pop() + '\n') 91 | fw.write(lines[-1]) 92 | 93 | def getopt2(name, opts, default = None) : 94 | value = [v for n,v in opts if n==name] 95 | if len(value) == 0 : 96 | return default 97 | return value[0] 98 | 99 | if __name__ == '__main__': 100 | 101 | try: 102 | opts, args = getopt.getopt(sys.argv[1:], "r:a:d:p:") 103 | 104 | # get the three mandatory arguments 105 | wavfile, trsfile, outfile = args 106 | # get options 107 | sr_override = getopt2("-r", opts) 108 | dict_add = getopt2("-a", opts) 109 | dict_alone = getopt2("-d", opts) 110 | puncs = getopt2("-p", opts) 111 | 112 | except: 113 | print __doc__ 114 | sys.exit(0) 115 | 116 | tmpbase = '/tmp/' + os.environ['USER'] + '_' + str(os.getpid()) 117 | 118 | #find sampling rate and prepare wavefile 119 | if sr_override: 120 | SR = int(sr_override) 121 | os.system('sox ' + wavfile + ' -r ' + str(SR) + ' ' + tmpbase + '.wav') 122 | else: 123 | f = wave.open(wavfile, 'r') 124 | SR = f.getframerate() 125 | f.close() 126 | if (SR not in [8000, 16000]): 127 | os.system('sox ' + wavfile + ' -r 16000 ' + tmpbase + '.wav') 128 | SR = 16000 129 | else: 130 | os.system('cp -f ' + wavfile + ' ' + tmpbase + '.wav') 131 | 132 | #prepare plpfile 133 | os.system('HCopy -C ' + MODEL_DIR + '/' + str(SR) + '/config ' + tmpbase + '.wav ' + tmpbase + '.plp') 134 | 135 | #prepare mlfile and dictionary 136 | if dict_alone: 137 | f = codecs.open(dict_alone, 'r', 'utf-8') 138 | lines = f.readlines() 139 | f.close() 140 | lines = lines + ['sp sp\n'] 141 | else: 142 | f = codecs.open(MODEL_DIR + '/dict', 'r', 'utf-8') 143 | lines = f.readlines() 144 | f.close() 145 | if (dict_add): 146 | f = codecs.open(dict_add, 'r', 'utf-8') 147 | lines2 = f.readlines() 148 | f.close() 149 | lines = lines + lines2 150 | fw = codecs.open(tmpbase + '.dict', 'w', 'utf-8') 151 | for line in lines: 152 | fw.write(line) 153 | 154 | if puncs: 155 | os.system('cp -f ' + puncs + ' ' + tmpbase + '.puncs') 156 | else: 157 | os.system('cp -f ' + MODEL_DIR + '/puncs ' + tmpbase + '.puncs') 158 | 159 | unks = prep_mlf(trsfile, tmpbase) 160 | for unk in unks: 161 | missing.write(u'Missing: ' + unk + '\n') 162 | 163 | #run alignment 164 | os.system('HVite -T 1 -a -m -t 10000.0 10000.0 100000.0 -I ' + tmpbase + '.mlf -H ' + MODEL_DIR + '/' + str(SR) + '/macros -H ' + MODEL_DIR + '/' + str(SR) + '/hmmdefs -i ' + tmpbase + '.aligned' + ' ' + tmpbase + '.dict ' + MODEL_DIR + '/monophones ' + tmpbase + '.plp' + ' > ' + tmpbase + '.results') 165 | 166 | gen_res(tmpbase + '.aligned', tmpbase + '.mlf', outfile) 167 | 168 | #clean up 169 | os.system('rm -f ' + tmpbase + '*') 170 | -------------------------------------------------------------------------------- /Calign2mlf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ Usage: 4 | Calign.py [options] wavfile trsfile output_file 5 | where options may include: 6 | -r sampling_rate -- override which sampling rate model to use, either 8000 or 16000 7 | -a user_supplied_dictionary -- encoded in utf8, the dictionary will be combined with the dictionary in the model 8 | -d user_supplied_dictionary -- encoded in utf8, the dictionary will be used alone, NOT combined with the dictionary in the model 9 | -p punctuations -- encoded in utf8, punctuations and other symbols in this file will be deleted in forced alignment, the default is to use "puncs" in the model 10 | """ 11 | 12 | import os 13 | import sys 14 | import getopt 15 | import wave 16 | import codecs 17 | import io 18 | 19 | HOMEDIR = '/Users/xuchenzi/Documents/phonetics/P2FA_Mandarin/run' 20 | MODEL_DIR = HOMEDIR + '/model' 21 | 22 | missing = io.open('MissingWords', 'w', encoding='utf8') 23 | 24 | def prep_mlf(trsfile, tmpbase): 25 | 26 | f = codecs.open(tmpbase + '.dict', 'r', 'utf-8') 27 | lines = f.readlines() 28 | f.close() 29 | dict = [] 30 | for line in lines: 31 | dict.append(line.split()[0]) 32 | f = codecs.open(tmpbase + '.puncs', 'r', 'utf-8') 33 | lines = f.readlines() 34 | f.close() 35 | puncs = [] 36 | for line in lines: 37 | puncs.append(line.strip()) 38 | 39 | f = codecs.open(trsfile, 'r', 'utf-8') 40 | lines = f.readlines() 41 | f.close() 42 | 43 | fw = codecs.open(tmpbase + '.mlf', 'w', 'utf-8') 44 | fw.write('#!MLF!#\n') 45 | fw.write('"' + tmpbase + '.lab"\n') 46 | fw.write('sp\n') 47 | i = 0 48 | unks = set() 49 | while (i < len(lines)): 50 | txt = lines[i].replace('\n', '') 51 | txt = txt.replace('{breath}', 'br').replace('{noise}', 'ns') 52 | txt = txt.replace('{laugh}', 'lg').replace('{laughter}', 'lg') 53 | txt = txt.replace('{cough}', 'cg').replace('{lipsmack}', 'ls') 54 | for pun in puncs: 55 | txt = txt.replace(pun, '') 56 | for wrd in txt.split(): 57 | if (wrd in dict): 58 | fw.write(wrd + '\n') 59 | fw.write('sp\n') 60 | else: 61 | unks.add(wrd) 62 | i += 1 63 | fw.write('.\n') 64 | fw.close() 65 | return unks 66 | 67 | 68 | def gen_res(infile1, infile2, outfile): 69 | 70 | f = codecs.open(infile1, 'r', 'utf-8') 71 | lines = f.readlines() 72 | f.close() 73 | 74 | f = codecs.open(infile2, 'r', 'utf-8') 75 | lines2 = f.readlines() 76 | f.close() 77 | words = [] 78 | for line in lines2[2:-1]: 79 | if (line.strip() != 'sp'): 80 | words.append(line.strip()) 81 | words.reverse() 82 | 83 | fw = codecs.open(outfile, 'w', 'utf-8') 84 | fw.write(lines[0]) 85 | fw.write(lines[1]) 86 | for line in lines[2:-1]: 87 | if ((line.split()[-1].strip() == 'sp') or (len(line.split()) != 5)): 88 | fw.write(line) 89 | else: 90 | fw.write(line.split()[0] + ' ' + line.split()[1] + ' ' + line.split()[2] + ' ' + line.split()[3] + ' ' + words.pop() + '\n') 91 | fw.write(lines[-1]) 92 | 93 | def getopt2(name, opts, default = None) : 94 | value = [v for n,v in opts if n==name] 95 | if len(value) == 0 : 96 | return default 97 | return value[0] 98 | 99 | if __name__ == '__main__': 100 | 101 | try: 102 | opts, args = getopt.getopt(sys.argv[1:], "r:a:d:p:") 103 | 104 | # get the three mandatory arguments 105 | wavfile, trsfile, outfile = args 106 | # get options 107 | sr_override = getopt2("-r", opts) 108 | dict_add = getopt2("-a", opts) 109 | dict_alone = getopt2("-d", opts) 110 | puncs = getopt2("-p", opts) 111 | 112 | except: 113 | print(__doc__) 114 | sys.exit(0) 115 | 116 | tmpbase = '/tmp/' + os.environ['USER'] + '_' + str(os.getpid()) 117 | 118 | #find sampling rate and prepare wavefile 119 | if sr_override: 120 | SR = int(sr_override) 121 | os.system('sox ' + wavfile + ' -r ' + str(SR) + ' ' + tmpbase + '.wav') 122 | else: 123 | f = wave.open(wavfile, 'r') 124 | SR = f.getframerate() 125 | f.close() 126 | if (SR not in [8000, 16000]): 127 | os.system('sox ' + wavfile + ' -r 16000 ' + tmpbase + '.wav') 128 | SR = 16000 129 | else: 130 | os.system('cp -f ' + wavfile + ' ' + tmpbase + '.wav') 131 | 132 | #prepare plpfile 133 | os.system('HCopy -C ' + MODEL_DIR + '/' + str(SR) + '/config ' + tmpbase + '.wav ' + tmpbase + '.plp') 134 | 135 | #prepare mlfile and dictionary 136 | if dict_alone: 137 | f = codecs.open(dict_alone, 'r', 'utf-8') 138 | lines = f.readlines() 139 | f.close() 140 | lines = lines + ['sp sp\n'] 141 | else: 142 | f = codecs.open(MODEL_DIR + '/dict', 'r', 'utf-8') 143 | lines = f.readlines() 144 | f.close() 145 | if (dict_add): 146 | f = codecs.open(dict_add, 'r', 'utf-8') 147 | lines2 = f.readlines() 148 | f.close() 149 | lines = lines + lines2 150 | fw = codecs.open(tmpbase + '.dict', 'w', 'utf-8') 151 | for line in lines: 152 | fw.write(line) 153 | 154 | if puncs: 155 | os.system('cp -f ' + puncs + ' ' + tmpbase + '.puncs') 156 | else: 157 | os.system('cp -f ' + MODEL_DIR + '/puncs ' + tmpbase + '.puncs') 158 | 159 | unks = prep_mlf(trsfile, tmpbase) 160 | for unk in unks: 161 | missing.write('Missing: ' + unk + '\n') 162 | 163 | #run alignment 164 | os.system('HVite -T 1 -a -m -t 10000.0 10000.0 100000.0 -I ' + tmpbase + '.mlf -H ' + MODEL_DIR + '/' + str(SR) + '/macros -H ' + MODEL_DIR + '/' + str(SR) + '/hmmdefs -i ' + tmpbase + '.aligned' + ' ' + tmpbase + '.dict ' + MODEL_DIR + '/monophones ' + tmpbase + '.plp' + ' > ' + tmpbase + '.results') 165 | 166 | gen_res(tmpbase + '.aligned', tmpbase + '.mlf', outfile) 167 | 168 | #clean up 169 | os.system('rm -f ' + tmpbase + '*') 170 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # P2FA_Mandarin_py3 2 | Modified Python3 P2FA for Mandarin 3 | 4 | ## Quick Start: 5 | 6 | If you have installed these: 7 | - [x] Python 3 8 | - [x] Htk 3.4 9 | - [x] SoX (Sound eXchange) 10 | 11 | Then you can: 12 | 1. Clone this repository 13 | 2. Add a `filename.wav` file and its corresponding transcript `filename.txt` file into the `/run` directory. 14 | 3. Open the `Calign2textgrid.py` in your editor and **modify** the path of your `/run` folder in line 21 `HOMEDIR = ` (You can find the path by dragging the folder into the Terminal on a Mac) 15 | 4. In Terminal, navigate to the `/run` directory: 16 | 17 | ``` 18 | $ python Calign2textgrid.py filename.wav filename.txt filename.Textgrid 19 | ``` 20 | The output `filename.Textgrid` is the corresponding time-aligned .Textgrid file. 21 | 22 | #### A detailed step-by-step guide: https://chenzixu.rbind.io/resources/forcedalignment/ 23 | -------------------------------------------------------------------------------- /mlf2txtgrid.py: -------------------------------------------------------------------------------- 1 | # adapted from Jiahong Yuan by Chenzi Xu in Dec 2019 2 | # Use: python mlf2textgrid.py test.mlf 3 | import tempfile 4 | import codecs 5 | 6 | 7 | def readAlignedMLF(mlffile, SR, wave_start): 8 | # This reads a MLF alignment output file with phone and word 9 | # alignments and returns a list of words, each word is a list containing 10 | # the word label followed by the phones, each phone is a tuple 11 | # (phone, start_time, end_time) with times in seconds. 12 | 13 | f = codecs.open(mlffile, 'r', 'utf-8') 14 | lines = [l.rstrip() for l in f.readlines()] 15 | f.close() 16 | 17 | if len(lines) < 3: 18 | raise ValueError("Alignment did not complete succesfully.") 19 | 20 | j = 2 21 | ret = [] 22 | while (lines[j] != '.'): 23 | if (len(lines[j].split()) == 5): # Is this the start of a word; do we have a word label? 24 | # Make a new word list in ret and put the word label at the beginning 25 | wrd = lines[j].split()[4] 26 | ret.append([wrd]) 27 | 28 | # Append this phone to the latest word (sub-)list 29 | ph = lines[j].split()[2] 30 | if (SR == 11025): 31 | st = (float(lines[j].split()[0])/10000000.0 + 0.0125)*(11000.0/11025.0) 32 | en = (float(lines[j].split()[1])/10000000.0 + 0.0125)*(11000.0/11025.0) 33 | else: 34 | st = float(lines[j].split()[0])/10000000.0 + 0.0125 35 | en = float(lines[j].split()[1])/10000000.0 + 0.0125 36 | if st < en: 37 | ret[-1].append([ph, st+wave_start, en+wave_start]) 38 | 39 | j += 1 40 | 41 | return ret 42 | 43 | 44 | def writeTextGrid(outfile, word_alignments): 45 | # make the list of just phone alignments 46 | phons = [] 47 | for wrd in word_alignments: 48 | phons.extend(wrd[1:]) # skip the word label 49 | 50 | # make the list of just word alignments 51 | # we're getting elements of the form: 52 | # ["word label", ["phone1", start, end], ["phone2", start, end], ...] 53 | wrds = [] 54 | for wrd in word_alignments: 55 | # If no phones make up this word, then it was an optional word 56 | # like a pause that wasn't actually realized. 57 | if len(wrd) == 1: 58 | continue 59 | # word label, first phone start time, last phone end time 60 | wrds.append([wrd[0], wrd[1][1], wrd[-1][2]]) 61 | 62 | # write the phone interval tier 63 | fw = open(outfile, 'w') 64 | fw.write('File type = "ooTextFile short"\n') 65 | fw.write('"TextGrid"\n') 66 | fw.write('\n') 67 | fw.write(str(phons[0][1]) + '\n') 68 | fw.write(str(phons[-1][2]) + '\n') 69 | fw.write('\n') 70 | fw.write('2\n') 71 | fw.write('"IntervalTier"\n') 72 | fw.write('"phone"\n') 73 | fw.write(str(phons[0][1]) + '\n') 74 | fw.write(str(phons[-1][-1]) + '\n') 75 | fw.write(str(len(phons)) + '\n') 76 | for k in range(len(phons)): 77 | fw.write(str(phons[k][1]) + '\n') 78 | fw.write(str(phons[k][2]) + '\n') 79 | fw.write('"' + phons[k][0] + '"' + '\n') 80 | 81 | # write the word interval tier 82 | fw.write('"IntervalTier"\n') 83 | fw.write('"word"\n') 84 | fw.write(str(phons[0][1]) + '\n') 85 | fw.write(str(phons[-1][-1]) + '\n') 86 | fw.write(str(len(wrds)) + '\n') 87 | for k in range(len(wrds) - 1): 88 | fw.write(str(wrds[k][1]) + '\n') 89 | fw.write(str(wrds[k+1][1]) + '\n') 90 | fw.write('"' + wrds[k][0] + '"' + '\n') 91 | 92 | fw.write(str(wrds[-1][1]) + '\n') 93 | fw.write(str(phons[-1][2]) + '\n') 94 | fw.write('"' + wrds[-1][0] + '"' + '\n') 95 | fw.close() 96 | 97 | 98 | if __name__ == "__main__": 99 | # Load MLF file 100 | fname_in = './test1.mlf' 101 | fname_out = './test1mlf.TextGrid' 102 | # Default sampling rate 103 | SR = 11025.0 104 | wav_start = 0.0 105 | word_alignments = readAlignedMLF(fname_in, SR, wav_start) 106 | print(word_alignments) 107 | # Write file 108 | writeTextGrid(fname_out, word_alignments) 109 | -------------------------------------------------------------------------------- /run/Calign2textgrid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # adapted from Jiahong Yuan by Chenzi Xu in Dec 2019 3 | 4 | """ Usage: 5 | Calign.py [options] wavfile trsfile output_file 6 | where options may include: 7 | -r sampling_rate -- override which sampling rate model to use, either 8000 or 16000 8 | -a user_supplied_dictionary -- encoded in utf8, the dictionary will be combined with the dictionary in the model 9 | -d user_supplied_dictionary -- encoded in utf8, the dictionary will be used alone, NOT combined with the dictionary in the model 10 | -p punctuations -- encoded in utf8, punctuations and other symbols in this file will be deleted in forced alignment, the default is to use "puncs" in the model 11 | soundfile requirements: mono 16,000Hz 16bits 12 | """ 13 | 14 | import os 15 | import sys 16 | import getopt 17 | import wave 18 | import codecs 19 | import io 20 | 21 | HOMEDIR = '/Users/xuchenzi/Documents/phonetics/P2FA_Mandarin/run' 22 | MODEL_DIR = HOMEDIR + '/model' 23 | 24 | missing = io.open('MissingWords', 'w', encoding='utf8') 25 | 26 | 27 | def prep_mlf(trsfile, tmpbase): 28 | 29 | f = codecs.open(tmpbase + '.dict', 'r', 'utf-8') 30 | lines = f.readlines() 31 | f.close() 32 | dict = [] 33 | for line in lines: 34 | dict.append(line.split()[0]) 35 | f = codecs.open(tmpbase + '.puncs', 'r', 'utf-8') 36 | lines = f.readlines() 37 | f.close() 38 | puncs = [] 39 | for line in lines: 40 | puncs.append(line.strip()) 41 | 42 | f = codecs.open(trsfile, 'r', 'utf-8') 43 | lines = f.readlines() 44 | f.close() 45 | 46 | fw = codecs.open(tmpbase + '.mlf', 'w', 'utf-8') 47 | fw.write('#!MLF!#\n') 48 | fw.write('"' + tmpbase + '.lab"\n') 49 | fw.write('sp\n') 50 | i = 0 51 | unks = set() 52 | while (i < len(lines)): 53 | txt = lines[i].replace('\n', '') 54 | txt = txt.replace('{breath}', 'br').replace('{noise}', 'ns') 55 | txt = txt.replace('{laugh}', 'lg').replace('{laughter}', 'lg') 56 | txt = txt.replace('{cough}', 'cg').replace('{lipsmack}', 'ls') 57 | for pun in puncs: 58 | txt = txt.replace(pun, '') 59 | for wrd in txt.split(): 60 | if (wrd in dict): 61 | fw.write(wrd + '\n') 62 | fw.write('sp\n') 63 | else: 64 | unks.add(wrd) 65 | i += 1 66 | fw.write('.\n') 67 | fw.close() 68 | return unks 69 | 70 | 71 | def gen_res(infile1, infile2, outfile): 72 | 73 | f = codecs.open(infile1, 'r', 'utf-8') 74 | lines = f.readlines() 75 | f.close() 76 | 77 | f = codecs.open(infile2, 'r', 'utf-8') 78 | lines2 = f.readlines() 79 | f.close() 80 | words = [] 81 | for line in lines2[2:-1]: 82 | if (line.strip() != 'sp'): 83 | words.append(line.strip()) 84 | words.reverse() 85 | 86 | fw = codecs.open(outfile, 'w', 'utf-8') 87 | fw.write(lines[0]) 88 | fw.write(lines[1]) 89 | for line in lines[2:-1]: 90 | if ((line.split()[-1].strip() == 'sp') or (len(line.split()) != 5)): 91 | fw.write(line) 92 | else: 93 | fw.write(line.split()[0] + ' ' + line.split()[1] + ' ' + line.split() 94 | [2] + ' ' + line.split()[3] + ' ' + words.pop() + '\n') 95 | fw.write(lines[-1]) 96 | 97 | 98 | def getopt2(name, opts, default=None): 99 | value = [v for n, v in opts if n == name] 100 | if len(value) == 0: 101 | return default 102 | return value[0] 103 | 104 | 105 | def readAlignedMLF(mlffile, SR, wave_start): 106 | # This reads a MLFalignment output file with phone and word 107 | # alignments and returns a list of words, each word is a list containing 108 | # the word label followed by the phones, each phone is a tuple 109 | # (phone, start_time, end_time) with times in seconds. 110 | 111 | # f = codecs.open(mlffile, 'r', 'utf-8') 112 | with codecs.open(mlffile, 'r', 'utf-8') as f: 113 | lines = [l.rstrip() for l in f.readlines()] 114 | f.close() 115 | 116 | if len(lines) < 3: 117 | raise ValueError("Alignment did not complete succesfully.") 118 | 119 | j = 2 120 | ret = [] 121 | while (lines[j] != '.'): 122 | if (len(lines[j].split()) == 5): 123 | # Is this the start of a word; do we have a word label? 124 | # Make a new word list in ret and put the word label at the beginning 125 | wrd = lines[j].split()[4] 126 | ret.append([wrd]) 127 | 128 | # Append this phone to the latest word (sub-)list 129 | ph = lines[j].split()[2] 130 | if (SR == 11025): 131 | st = (float(lines[j].split()[0])/10000000.0 + 0.0125)*(11000.0/11025.0) 132 | en = (float(lines[j].split()[1])/10000000.0 + 0.0125)*(11000.0/11025.0) 133 | else: 134 | st = float(lines[j].split()[0])/10000000.0 + 0.0125 135 | en = float(lines[j].split()[1])/10000000.0 + 0.0125 136 | if st < en: 137 | ret[-1].append([ph, st+wave_start, en+wave_start]) 138 | 139 | j += 1 140 | 141 | return ret 142 | 143 | 144 | def writeTextGrid(outfile, word_alignments): 145 | # make the list of just phone alignments 146 | phons = [] 147 | for wrd in word_alignments: 148 | phons.extend(wrd[1:]) # skip the word label 149 | 150 | # make the list of just word alignments 151 | # we're getting elements of the form: 152 | # ["word label", ["phone1", start, end], ["phone2", start, end], ...] 153 | wrds = [] 154 | for wrd in word_alignments: 155 | # If no phones make up this word, then it was an optional word 156 | # like a pause that wasn't actually realized. 157 | if len(wrd) == 1: 158 | continue 159 | # word label, first phone start time, last phone end time 160 | wrds.append([wrd[0], wrd[1][1], wrd[-1][2]]) 161 | 162 | # write the phone interval tier 163 | fw = open(outfile, 'w') 164 | fw.write('File type = "ooTextFile short"\n') 165 | fw.write('"TextGrid"\n') 166 | fw.write('\n') 167 | fw.write(str(phons[0][1]) + '\n') 168 | fw.write(str(phons[-1][2]) + '\n') 169 | fw.write('\n') 170 | fw.write('2\n') 171 | fw.write('"IntervalTier"\n') 172 | fw.write('"phone"\n') 173 | fw.write(str(phons[0][1]) + '\n') 174 | fw.write(str(phons[-1][-1]) + '\n') 175 | fw.write(str(len(phons)) + '\n') 176 | for k in range(len(phons)): 177 | fw.write(str(phons[k][1]) + '\n') 178 | fw.write(str(phons[k][2]) + '\n') 179 | fw.write('"' + phons[k][0] + '"' + '\n') 180 | 181 | # write the word interval tier 182 | fw.write('"IntervalTier"\n') 183 | fw.write('"word"\n') 184 | fw.write(str(phons[0][1]) + '\n') 185 | fw.write(str(phons[-1][-1]) + '\n') 186 | fw.write(str(len(wrds)) + '\n') 187 | for k in range(len(wrds) - 1): 188 | fw.write(str(wrds[k][1]) + '\n') 189 | fw.write(str(wrds[k+1][1]) + '\n') 190 | fw.write('"' + wrds[k][0] + '"' + '\n') 191 | 192 | fw.write(str(wrds[-1][1]) + '\n') 193 | fw.write(str(phons[-1][2]) + '\n') 194 | fw.write('"' + wrds[-1][0] + '"' + '\n') 195 | 196 | fw.close() 197 | 198 | 199 | if __name__ == '__main__': 200 | 201 | try: 202 | opts, args = getopt.getopt(sys.argv[1:], "r:a:d:p:") 203 | 204 | # get the three mandatory arguments 205 | wavfile, trsfile, outfile = args 206 | # get options 207 | sr_override = getopt2("-r", opts) 208 | dict_add = getopt2("-a", opts) 209 | dict_alone = getopt2("-d", opts) 210 | puncs = getopt2("-p", opts) 211 | 212 | except: 213 | print(__doc__) 214 | sys.exit(0) 215 | 216 | tmpbase = '/tmp/' + os.environ['USER'] + '_' + str(os.getpid()) 217 | 218 | # find sampling rate and prepare wavefile 219 | if sr_override: 220 | SR = int(sr_override) 221 | os.system('sox ' + wavfile + ' -r ' + str(SR) + ' ' + tmpbase + '.wav') 222 | else: 223 | f = wave.open(wavfile, 'r') 224 | SR = f.getframerate() 225 | f.close() 226 | if (SR not in [8000, 16000]): 227 | os.system('sox ' + wavfile + ' -r 16000 ' + tmpbase + '.wav') 228 | SR = 16000 229 | else: 230 | os.system('cp -f ' + wavfile + ' ' + tmpbase + '.wav') 231 | 232 | # prepare plpfile 233 | os.system('HCopy -C ' + MODEL_DIR + '/' + str(SR) + 234 | '/config ' + tmpbase + '.wav ' + tmpbase + '.plp') 235 | 236 | # prepare mlfile and dictionary 237 | if dict_alone: 238 | f = codecs.open(dict_alone, 'r', 'utf-8') 239 | lines = f.readlines() 240 | f.close() 241 | lines = lines + ['sp sp\n'] 242 | else: 243 | f = codecs.open(MODEL_DIR + '/dict', 'r', 'utf-8') 244 | lines = f.readlines() 245 | f.close() 246 | if (dict_add): 247 | f = codecs.open(dict_add, 'r', 'utf-8') 248 | lines2 = f.readlines() 249 | f.close() 250 | lines = lines + lines2 251 | fw = codecs.open(tmpbase + '.dict', 'w', 'utf-8') 252 | for line in lines: 253 | fw.write(line) 254 | 255 | if puncs: 256 | os.system('cp -f ' + puncs + ' ' + tmpbase + '.puncs') 257 | else: 258 | os.system('cp -f ' + MODEL_DIR + '/puncs ' + tmpbase + '.puncs') 259 | 260 | unks = prep_mlf(trsfile, tmpbase) 261 | for unk in unks: 262 | missing.write('Missing: ' + unk + '\n') 263 | 264 | # run alignment 265 | os.system('HVite -T 1 -a -m -t 10000.0 10000.0 100000.0 -I ' + tmpbase + '.mlf -H ' + MODEL_DIR + '/' + str(SR) + '/macros -H ' + MODEL_DIR + '/' + 266 | str(SR) + '/hmmdefs -i ' + tmpbase + '.aligned' + ' ' + tmpbase + '.dict ' + MODEL_DIR + '/monophones ' + tmpbase + '.plp' + ' > ' + tmpbase + '.results') 267 | 268 | gen_res(tmpbase + '.aligned', tmpbase + '.mlf', outfile) 269 | 270 | #output_mlf = tmpbase + '.aligned' 271 | wave_start = '0.0' 272 | 273 | writeTextGrid(outfile, readAlignedMLF(outfile, SR, float(wave_start))) 274 | 275 | # clean up 276 | os.system('rm -f ' + tmpbase + '*') 277 | -------------------------------------------------------------------------------- /run/MissingWords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenchenzi/P2FA_Mandarin_py3/57d331f8e218f1c4e4bc19e28064e603b67600fb/run/MissingWords -------------------------------------------------------------------------------- /run/model/16000/config: -------------------------------------------------------------------------------- 1 | # Coding parameters 2 | SOURCEKIND = WAVEFORM 3 | SOURCEFORMAT = WAVE 4 | SOURCERATE = 625.0 5 | TARGETKIND = PLP_0_D_A_Z 6 | TARGETRATE = 100000.0 7 | SAVECOMPRESSED = F 8 | SAVEWITHCRC = F 9 | WINDOWSIZE = 250000.0 10 | ZMEANSOURCE = T 11 | USEHAMMING = T 12 | PREEMCOEF = 0.97 13 | NUMCHANS = 20 14 | LPCORDER = 12 15 | USEPOWER = T 16 | -------------------------------------------------------------------------------- /run/model/16000/macros: -------------------------------------------------------------------------------- 1 | ~o 2 | 1 39 3 | 39 4 | ~v "varFloor1" 5 | 39 6 | 4.881267e-03 5.346371e-03 5.542536e-03 4.809141e-03 5.096380e-03 3.920087e-03 3.509250e-03 3.586160e-03 2.896836e-03 2.508252e-03 2.188610e-03 1.661693e-03 5.940058e-03 3.414730e-04 3.404064e-04 2.781517e-04 3.186265e-04 3.224213e-04 2.834843e-04 2.782966e-04 2.984269e-04 2.451008e-04 2.186016e-04 1.868397e-04 1.407243e-04 2.591136e-04 5.620261e-05 5.492153e-05 4.400828e-05 5.257877e-05 5.543127e-05 5.095266e-05 5.175121e-05 5.600607e-05 4.649820e-05 4.210568e-05 3.566974e-05 2.684142e-05 3.749156e-05 7 | -------------------------------------------------------------------------------- /run/model/8000/config: -------------------------------------------------------------------------------- 1 | # Coding parameters 2 | SOURCEKIND = WAVEFORM 3 | SOURCEFORMAT = WAVE 4 | SOURCERATE = 1250.0 5 | TARGETKIND = PLP_E_D_A_Z 6 | TARGETRATE = 100000.0 7 | SAVECOMPRESSED = T 8 | SAVEWITHCRC = T 9 | WINDOWSIZE = 250000.0 10 | ZMEANSOURCE = T 11 | USEHAMMING = T 12 | PREEMCOEF = 0.97 13 | NUMCHANS = 20 14 | LPCORDER = 12 15 | USEPOWER = T 16 | -------------------------------------------------------------------------------- /run/model/8000/macros: -------------------------------------------------------------------------------- 1 | ~o 2 | 1 39 3 | 39 4 | ~v "varFloor1" 5 | 39 6 | 4.043175e-03 3.520668e-03 2.727962e-03 4.756826e-03 2.739920e-03 3.087194e-03 3.056856e-03 2.938626e-03 2.520009e-03 2.086400e-03 1.881010e-03 1.485537e-03 1.096926e-03 1.522320e-04 1.626377e-04 1.445403e-04 2.161604e-04 1.794765e-04 2.068211e-04 2.158113e-04 2.099576e-04 1.904367e-04 1.654311e-04 1.452342e-04 1.209497e-04 2.209249e-05 2.499748e-05 2.595413e-05 2.496012e-05 3.634888e-05 3.246813e-05 3.804211e-05 4.027777e-05 3.951971e-05 3.622150e-05 3.179955e-05 2.769810e-05 2.346084e-05 3.038510e-06 7 | -------------------------------------------------------------------------------- /run/model/monophones: -------------------------------------------------------------------------------- 1 | % 2 | & 3 | > 4 | @ 5 | C 6 | E 7 | I 8 | N 9 | R 10 | S 11 | U 12 | W 13 | Z 14 | a 15 | b 16 | br 17 | c 18 | cg 19 | d 20 | e 21 | f 22 | g 23 | h 24 | i 25 | j 26 | k 27 | l 28 | lg 29 | ls 30 | m 31 | n 32 | ns 33 | o 34 | p 35 | q 36 | r 37 | s 38 | sil 39 | sp 40 | t 41 | u 42 | w 43 | x 44 | y 45 | z 46 | -------------------------------------------------------------------------------- /run/model/puncs: -------------------------------------------------------------------------------- 1 | , 2 | 。 3 | : 4 | ; 5 | ? 6 | ! 7 | 、 8 | , 9 | . 10 | : 11 | ; 12 | ? 13 | ! 14 | " 15 | % 16 | - 17 | -------------------------------------------------------------------------------- /test/.Zap.0.puncs: -------------------------------------------------------------------------------- 1 | , 2 | : 3 | -------------------------------------------------------------------------------- /test/MissingWords: -------------------------------------------------------------------------------- 1 | Missing: 还还还 2 | -------------------------------------------------------------------------------- /test/dict: -------------------------------------------------------------------------------- 1 | 还还还 h a 2 | -------------------------------------------------------------------------------- /test/o.test_16000.align: -------------------------------------------------------------------------------- 1 | #!MLF!# 2 | "/tmp/jiahong_11897.rec" 3 | 0 2500000 sp 974.900818 sp 4 | 2500000 3400000 j 221.419769 经济 5 | 3400000 3800000 i 61.806274 6 | 3800000 4700000 N 107.410400 7 | 4700000 5100000 j 44.406029 8 | 5100000 5600000 i 79.378159 9 | 5600000 5600000 sp -0.076879 sp 10 | 5600000 6400000 z 115.739517 自由 11 | 6400000 7300000 I 179.200150 12 | 7300000 8100000 y 168.535904 13 | 8100000 9600000 o 427.144318 14 | 9600000 9900000 w 28.448969 15 | 9900000 9900000 sp -0.076879 sp 16 | 9900000 10900000 h 145.013367 和 17 | 10900000 12000000 & 261.809143 18 | 12000000 12000000 sp -0.076879 sp 19 | 12000000 13300000 q 271.435455 其他 20 | 13300000 14100000 i 199.070129 21 | 14100000 14600000 t 76.701569 22 | 14600000 15800000 a 274.273590 23 | 15800000 15800000 sp -0.076879 sp 24 | 15800000 16600000 f 158.931137 方面 25 | 16600000 17100000 a 124.600624 26 | 17100000 17400000 N 49.126892 27 | 17400000 17900000 m 100.946114 28 | 17900000 18200000 y 18.591831 29 | 18200000 18600000 E 79.596268 30 | 18600000 18900000 n 33.211811 31 | 18900000 18900000 sp -0.076879 sp 32 | 18900000 19300000 d 61.835171 的 33 | 19300000 19800000 & 99.924805 34 | 19800000 19800000 sp -0.076879 sp 35 | 19800000 20600000 z 146.982803 自由 36 | 20600000 21500000 I 186.389099 37 | 21500000 22500000 y 258.086823 38 | 22500000 23700000 o 299.957611 39 | 23700000 24300000 w 90.956604 40 | 24300000 27200000 sp 929.874207 sp 41 | 27200000 28300000 m 210.598740 没有 42 | 28300000 28800000 e 131.147903 43 | 28800000 29100000 y 74.190033 44 | 29100000 29500000 y 98.510986 45 | 29500000 30400000 o 225.242081 46 | 30400000 30700000 w 43.950226 47 | 30700000 30700000 sp -0.076879 sp 48 | 30700000 31600000 m 227.060089 明显 49 | 31600000 32500000 i 213.690567 50 | 32500000 33200000 N 88.227440 51 | 33200000 34100000 x 156.143967 52 | 34100000 34600000 y 95.357971 53 | 34600000 35000000 E 89.767639 54 | 35000000 35300000 n 32.689030 55 | 35300000 35300000 sp -0.076879 sp 56 | 35300000 35600000 d 55.840229 的 57 | 35600000 36100000 & 114.320190 58 | 36100000 36100000 sp -0.076879 sp 59 | 36100000 37000000 j 223.540710 界限 60 | 37000000 37300000 y 78.275055 61 | 37300000 38200000 E 311.848206 62 | 38200000 39200000 x 249.330826 63 | 39200000 39500000 y 70.710411 64 | 39500000 40200000 E 170.178101 65 | 40200000 40800000 n 93.546394 66 | 40800000 44600000 sp 1596.034546 sp 67 | . 68 | -------------------------------------------------------------------------------- /test/o.test_8000.align: -------------------------------------------------------------------------------- 1 | #!MLF!# 2 | "/tmp/jiahong_11836.rec" 3 | 0 8600000 sp 2655.220215 sp 4 | 8600000 9300000 o 130.016510 噢 5 | 9300000 9300000 sp -0.132768 sp 6 | 9300000 9800000 n 48.759514 那 7 | 9800000 10400000 a 142.300461 8 | 10400000 10400000 sp -0.132768 sp 9 | 10400000 11700000 i 250.983337 一定 10 | 11700000 12000000 d 41.678020 11 | 12000000 12700000 i 109.374924 12 | 12700000 13000000 N 24.200844 13 | 13000000 13000000 sp -0.132768 sp 14 | 13000000 13300000 S 39.259541 是 15 | 13300000 13600000 % 37.615509 16 | 13600000 13600000 sp -0.132768 sp 17 | 13600000 13900000 y 42.209637 有 18 | 13900000 14300000 o 108.595078 19 | 14300000 14700000 w 76.075394 20 | 14700000 14700000 sp -0.132768 sp 21 | 14700000 15000000 i 39.315922 一 22 | 15000000 15000000 sp -0.132768 sp 23 | 15000000 15400000 Z 95.461388 种 24 | 15400000 16500000 o 296.506165 25 | 16500000 18100000 N 454.212891 26 | 18100000 18100000 sp -0.132768 sp 27 | 18100000 19700000 t 323.293182 特别 28 | 19700000 20000000 & 13.081275 29 | 20000000 20400000 b 60.888443 30 | 20400000 21900000 y 256.355316 31 | 21900000 22400000 E 47.157494 32 | 22400000 22900000 sp 94.957939 sp 33 | 22900000 24200000 t 242.286789 特别 34 | 24200000 24600000 & 45.574703 35 | 24600000 24900000 b 47.007500 36 | 24900000 25200000 y 34.037067 37 | 25200000 27000000 E 380.627563 38 | 27000000 28100000 sp 380.687012 sp 39 | 28100000 28600000 b 113.949776 不 40 | 28600000 29000000 u 39.782444 41 | 29000000 29000000 sp -0.132768 sp 42 | 29000000 29400000 i 68.105499 一样 43 | 29400000 29900000 y 115.556274 44 | 29900000 31100000 a 407.337585 45 | 31100000 31400000 N 61.972122 46 | 31400000 31400000 sp -0.132768 sp 47 | 31400000 31700000 d 30.656164 的 48 | 31700000 32000000 & 49.452637 49 | 32000000 32000000 sp -0.132768 sp 50 | 32000000 32700000 g 87.686653 感受 51 | 32700000 33300000 @ 108.202339 52 | 33300000 33700000 n 50.980190 53 | 33700000 34200000 S 105.960876 54 | 34200000 34700000 o 121.505157 55 | 34700000 35200000 w 121.629166 56 | 35200000 35200000 sp -0.132768 sp 57 | 35200000 35500000 r 86.166023 让 58 | 35500000 36000000 a 171.147858 59 | 36000000 36300000 N 95.011627 60 | 36300000 36300000 sp -0.132768 sp 61 | 36300000 37200000 r 398.568054 人 62 | 37200000 39300000 & 981.831360 63 | 39300000 39600000 n 51.836002 64 | 39600000 45400000 sp 1947.180908 sp 65 | . 66 | -------------------------------------------------------------------------------- /test/puncs: -------------------------------------------------------------------------------- 1 | , 2 | 。 3 | : 4 | ; 5 | ? 6 | ! 7 | 、 8 | , 9 | . 10 | : 11 | ; 12 | ? 13 | ! 14 | " 15 | % 16 | - 17 | -------------------------------------------------------------------------------- /test/res1: -------------------------------------------------------------------------------- 1 | #!MLF!# 2 | "/tmp/jiahong_24072.rec" 3 | 0 1000000 sp 393.559479 sp 4 | 1000000 1300000 h 69.907013 还还还 5 | 1300000 1600000 a 45.189987 6 | 1600000 1900000 sp 107.773193 sp 7 | 1900000 2200000 h 58.900318 还还还 8 | 2200000 2500000 a 21.922968 9 | 2500000 2500000 sp -0.076879 sp 10 | 2500000 3400000 j 221.419800 经济 11 | 3400000 3800000 i 61.806274 12 | 3800000 4700000 N 107.410378 13 | 4700000 5100000 j 44.406025 14 | 5100000 5600000 i 79.378181 15 | 5600000 5600000 sp -0.076879 sp 16 | 5600000 6400000 z 115.739479 自由 17 | 6400000 7300000 I 179.200134 18 | 7300000 8100000 y 168.535812 19 | 8100000 9600000 o 427.144348 20 | 9600000 9900000 w 28.448975 21 | 9900000 9900000 sp -0.076879 sp 22 | 9900000 10900000 h 145.013290 和 23 | 10900000 12000000 & 261.809082 24 | 12000000 12000000 sp -0.076879 sp 25 | 12000000 13300000 q 271.435516 其他 26 | 13300000 14100000 i 199.070053 27 | 14100000 14600000 t 76.701584 28 | 14600000 15800000 a 274.273590 29 | 15800000 15800000 sp -0.076879 sp 30 | 15800000 16600000 f 158.931122 方面 31 | 16600000 17100000 a 124.600594 32 | 17100000 17400000 N 49.126904 33 | 17400000 17900000 m 100.946121 34 | 17900000 18200000 y 18.591824 35 | 18200000 18600000 E 79.596283 36 | 18600000 18900000 n 33.211811 37 | 18900000 18900000 sp -0.076879 sp 38 | 18900000 19300000 d 61.835182 的 39 | 19300000 19800000 & 99.924820 40 | 19800000 19800000 sp -0.076879 sp 41 | 19800000 20600000 z 146.982819 自由 42 | 20600000 21500000 I 186.389145 43 | 21500000 22500000 y 258.086792 44 | 22500000 23700000 o 299.957642 45 | 23700000 24300000 w 90.956642 46 | 24300000 27200000 sp 929.874329 sp 47 | 27200000 28300000 m 210.598740 没有 48 | 28300000 28800000 e 131.147903 49 | 28800000 29100000 y 74.190041 50 | 29100000 29500000 y 98.510986 51 | 29500000 30400000 o 225.242065 52 | 30400000 30700000 w 43.950230 53 | 30700000 30700000 sp -0.076879 sp 54 | 30700000 31600000 m 227.060074 明显 55 | 31600000 32500000 i 213.690598 56 | 32500000 33200000 N 88.227432 57 | 33200000 34100000 x 156.143631 58 | 34100000 34600000 y 95.357941 59 | 34600000 35000000 E 89.767639 60 | 35000000 35300000 n 32.689037 61 | 35300000 35300000 sp -0.076879 sp 62 | 35300000 35600000 d 55.840237 的 63 | 35600000 36100000 & 114.320183 64 | 36100000 36100000 sp -0.076879 sp 65 | 36100000 37000000 j 223.540710 界限 66 | 37000000 37300000 y 78.275063 67 | 37300000 38200000 E 311.848236 68 | 38200000 39200000 x 249.330688 69 | 39200000 39500000 y 70.710426 70 | 39500000 40200000 E 170.178116 71 | 40200000 40800000 n 93.546394 72 | 40800000 44600000 sp 1596.034424 sp 73 | . 74 | -------------------------------------------------------------------------------- /test/test1.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile short" 2 | "TextGrid" 3 | 4 | 0.0125 5 | 5.1725 6 | 7 | 2 8 | "IntervalTier" 9 | "phone" 10 | 0.0125 11 | 5.1725 12 | 46 13 | 0.0125 14 | 0.8624999999999999 15 | "sp" 16 | 0.8624999999999999 17 | 0.8925 18 | "n" 19 | 0.8925 20 | 0.9824999999999999 21 | "i" 22 | 0.9824999999999999 23 | 1.0325 24 | "g" 25 | 1.0325 26 | 1.0925 27 | "&" 28 | 1.0925 29 | 1.1424999999999998 30 | "N" 31 | 1.1424999999999998 32 | 1.2525 33 | "x" 34 | 1.2525 35 | 1.2925 36 | "i" 37 | 1.2925 38 | 1.3425 39 | "h" 40 | 1.3425 41 | 1.3924999999999998 42 | "w" 43 | 1.3924999999999998 44 | 1.4625 45 | "@" 46 | 1.4625 47 | 1.4925 48 | "n" 49 | 1.4925 50 | 1.5325 51 | "l" 52 | 1.5325 53 | 1.6325 54 | "a" 55 | 1.6325 56 | 1.6724999999999999 57 | "w" 58 | 1.6724999999999999 59 | 1.7025 60 | "y" 61 | 1.7025 62 | 1.8025 63 | "i" 64 | 1.8025 65 | 1.8425 66 | "N" 67 | 1.8425 68 | 1.9024999999999999 69 | "h" 70 | 1.9024999999999999 71 | 1.9625 72 | "@" 73 | 1.9625 74 | 1.9925 75 | "y" 76 | 1.9925 77 | 2.0925000000000002 78 | "S" 79 | 2.0925000000000002 80 | 2.1425 81 | "%" 82 | 2.1425 83 | 2.1725000000000003 84 | "y" 85 | 2.1725000000000003 86 | 2.3525 87 | "i" 88 | 2.3525 89 | 2.4325 90 | "N" 91 | 2.4325 92 | 2.4625000000000004 93 | "w" 94 | 2.4625000000000004 95 | 2.6025 96 | "u" 97 | 2.6025 98 | 3.1125000000000003 99 | "sp" 100 | 3.1125000000000003 101 | 3.1725000000000003 102 | "w" 103 | 3.1725000000000003 104 | 3.2725 105 | ">" 106 | 3.2725 107 | 3.3325 108 | "g" 109 | 3.3325 110 | 3.4025000000000003 111 | "&" 112 | 3.4025000000000003 113 | 3.4325 114 | "N" 115 | 3.4325 116 | 3.5525 117 | "x" 118 | 3.5525 119 | 3.6125000000000003 120 | "i" 121 | 3.6125000000000003 122 | 3.6825 123 | "h" 124 | 3.6825 125 | 3.7225 126 | "w" 127 | 3.7225 128 | 3.8125 129 | "@" 130 | 3.8125 131 | 3.8725 132 | "n" 133 | 3.8725 134 | 3.9025000000000003 135 | "y" 136 | 3.9025000000000003 137 | 4.0125 138 | "i" 139 | 4.0125 140 | 4.202500000000001 141 | "N" 142 | 4.202500000000001 143 | 4.2325 144 | "w" 145 | 4.2325 146 | 4.4425 147 | "u" 148 | 4.4425 149 | 5.1725 150 | "sp" 151 | "IntervalTier" 152 | "word" 153 | 0.0125 154 | 5.1725 155 | 14 156 | 0.0125 157 | 0.8624999999999999 158 | "sp" 159 | 0.8624999999999999 160 | 0.9824999999999999 161 | "你" 162 | 0.9824999999999999 163 | 1.1424999999999998 164 | "更" 165 | 1.1424999999999998 166 | 1.4925 167 | "喜欢" 168 | 1.4925 169 | 1.8425 170 | "老鹰" 171 | 1.8425 172 | 1.9925 173 | "还" 174 | 1.9925 175 | 2.1425 176 | "是" 177 | 2.1425 178 | 2.6025 179 | "鹦鹉" 180 | 2.6025 181 | 3.1125000000000003 182 | "sp" 183 | 3.1125000000000003 184 | 3.2725 185 | "我" 186 | 3.2725 187 | 3.4325 188 | "更" 189 | 3.4325 190 | 3.8725 191 | "喜欢" 192 | 3.8725 193 | 4.4425 194 | "鹦鹉" 195 | 4.4425 196 | 5.1725 197 | "sp" 198 | -------------------------------------------------------------------------------- /test/test1.mlf: -------------------------------------------------------------------------------- 1 | #!MLF!# 2 | "/tmp/xuchenzi_27944.rec" 3 | 0 8500000 sp 3079.143311 sp 4 | 8500000 8800000 n -1.651408 你 5 | 8800000 9700000 i 73.151802 6 | 9700000 9700000 sp -0.076879 sp 7 | 9700000 10200000 g 11.815635 更 8 | 10200000 10800000 & 27.017082 9 | 10800000 11300000 N 54.116631 10 | 11300000 11300000 sp -0.076879 sp 11 | 11300000 12400000 x 239.354340 喜欢 12 | 12400000 12800000 i -46.893253 13 | 12800000 13300000 h 69.991768 14 | 13300000 13800000 w 98.835899 15 | 13800000 14500000 @ 168.853958 16 | 14500000 14800000 n 65.589119 17 | 14800000 14800000 sp -0.076879 sp 18 | 14800000 15200000 l 106.302246 老鹰 19 | 15200000 16200000 a 315.355591 20 | 16200000 16600000 w 74.561493 21 | 16600000 16900000 y 58.356392 22 | 16900000 17900000 i 275.071899 23 | 17900000 18300000 N 45.172337 24 | 18300000 18300000 sp -0.076879 sp 25 | 18300000 18900000 h 35.415901 还 26 | 18900000 19500000 @ 139.136292 27 | 19500000 19800000 y 69.871605 28 | 19800000 19800000 sp -0.076879 sp 29 | 19800000 20800000 S 224.314926 是 30 | 20800000 21300000 % 60.919537 31 | 21300000 21300000 sp -0.076879 sp 32 | 21300000 21600000 y 65.146660 鹦鹉 33 | 21600000 23400000 i 509.238190 34 | 23400000 24200000 N 274.538422 35 | 24200000 24500000 w 71.697403 36 | 24500000 25900000 u 426.615082 37 | 25900000 31000000 sp 1286.679199 sp 38 | 31000000 31600000 w 131.876755 我 39 | 31600000 32600000 > 225.530106 40 | 32600000 32600000 sp -0.076879 sp 41 | 32600000 33200000 g 25.476921 更 42 | 33200000 33900000 & 68.824425 43 | 33900000 34200000 N 51.120422 44 | 34200000 34200000 sp -0.076879 sp 45 | 34200000 35400000 x 225.093246 喜欢 46 | 35400000 36000000 i 58.948406 47 | 36000000 36700000 h 87.253685 48 | 36700000 37100000 w 65.254646 49 | 37100000 38000000 @ 137.905014 50 | 38000000 38600000 n 140.870575 51 | 38600000 38600000 sp -0.076879 sp 52 | 38600000 38900000 y 70.878792 鹦鹉 53 | 38900000 40000000 i 321.959229 54 | 40000000 41900000 N 652.475769 55 | 41900000 42200000 w 85.424362 56 | 42200000 44300000 u 679.561646 57 | 44300000 51600000 sp 2439.279541 sp 58 | . 59 | -------------------------------------------------------------------------------- /test/test1.txt: -------------------------------------------------------------------------------- 1 | 你 更 喜欢 老鹰 还 是 鹦鹉 我 更 喜欢 鹦鹉 2 | -------------------------------------------------------------------------------- /test/test1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenchenzi/P2FA_Mandarin_py3/57d331f8e218f1c4e4bc19e28064e603b67600fb/test/test1.wav -------------------------------------------------------------------------------- /test/test_16000.align: -------------------------------------------------------------------------------- 1 | #!MLF!# 2 | "/tmp/myl_26185.rec" 3 | 0 2500000 sp 974.901001 sp 4 | 2500000 3400000 j 221.419800 经济 5 | 3400000 3800000 i 61.806274 6 | 3800000 4700000 N 107.410378 7 | 4700000 5100000 j 44.406025 8 | 5100000 5600000 i 79.378181 9 | 5600000 5600000 sp -0.076879 sp 10 | 5600000 6400000 z 115.739479 自由 11 | 6400000 7300000 I 179.200134 12 | 7300000 8100000 y 168.535812 13 | 8100000 9600000 o 427.144348 14 | 9600000 9900000 w 28.448975 15 | 9900000 9900000 sp -0.076879 sp 16 | 9900000 10900000 h 145.013290 和 17 | 10900000 12000000 & 261.809082 18 | 12000000 12000000 sp -0.076879 sp 19 | 12000000 13300000 q 271.435516 其他 20 | 13300000 14100000 i 199.070053 21 | 14100000 14600000 t 76.701584 22 | 14600000 15800000 a 274.273590 23 | 15800000 15800000 sp -0.076879 sp 24 | 15800000 16600000 f 158.931122 方面 25 | 16600000 17100000 a 124.600594 26 | 17100000 17400000 N 49.126904 27 | 17400000 17900000 m 100.946121 28 | 17900000 18200000 y 18.591824 29 | 18200000 18600000 E 79.596283 30 | 18600000 18900000 n 33.211811 31 | 18900000 18900000 sp -0.076879 sp 32 | 18900000 19300000 d 61.835182 的 33 | 19300000 19800000 & 99.924820 34 | 19800000 19800000 sp -0.076879 sp 35 | 19800000 20600000 z 146.982819 自由 36 | 20600000 21500000 I 186.389145 37 | 21500000 22500000 y 258.086792 38 | 22500000 23700000 o 299.957642 39 | 23700000 24300000 w 90.956642 40 | 24300000 27200000 sp 929.874329 sp 41 | 27200000 28300000 m 210.598740 没有 42 | 28300000 28800000 e 131.147903 43 | 28800000 29100000 y 74.190041 44 | 29100000 29500000 y 98.510986 45 | 29500000 30400000 o 225.242065 46 | 30400000 30700000 w 43.950230 47 | 30700000 30700000 sp -0.076879 sp 48 | 30700000 31600000 m 227.060074 明显 49 | 31600000 32500000 i 213.690598 50 | 32500000 33200000 N 88.227432 51 | 33200000 34100000 x 156.143631 52 | 34100000 34600000 y 95.357941 53 | 34600000 35000000 E 89.767639 54 | 35000000 35300000 n 32.689037 55 | 35300000 35300000 sp -0.076879 sp 56 | 35300000 35600000 d 55.840237 的 57 | 35600000 36100000 & 114.320183 58 | 36100000 36100000 sp -0.076879 sp 59 | 36100000 37000000 j 223.540710 界限 60 | 37000000 37300000 y 78.275063 61 | 37300000 38200000 E 311.848236 62 | 38200000 39200000 x 249.330688 63 | 39200000 39500000 y 70.710426 64 | 39500000 40200000 E 170.178116 65 | 40200000 40800000 n 93.546394 66 | 40800000 44600000 sp 1596.034424 sp 67 | . 68 | -------------------------------------------------------------------------------- /test/test_16000.txt: -------------------------------------------------------------------------------- 1 | 还还还 还还还 经济 自由 和 其他 方面 的 自由 没有 明显 的 界限。 2 | -------------------------------------------------------------------------------- /test/test_16000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenchenzi/P2FA_Mandarin_py3/57d331f8e218f1c4e4bc19e28064e603b67600fb/test/test_16000.wav -------------------------------------------------------------------------------- /test/test_8000.align: -------------------------------------------------------------------------------- 1 | #!MLF!# 2 | "/tmp/myl_24573.rec" 3 | 0 8600000 sp 2655.220215 sp 4 | 8600000 9300000 o 130.016449 噢 5 | 9300000 9300000 sp -0.132768 sp 6 | 9300000 9800000 n 48.759750 那 7 | 9800000 10400000 a 142.300674 8 | 10400000 10400000 sp -0.132768 sp 9 | 10400000 11700000 i 250.983871 一定 10 | 11700000 12000000 d 41.677925 11 | 12000000 12700000 i 109.374626 12 | 12700000 13000000 N 24.200605 13 | 13000000 13000000 sp -0.132768 sp 14 | 13000000 13300000 S 39.259361 是 15 | 13300000 13600000 % 37.615498 16 | 13600000 13600000 sp -0.132768 sp 17 | 13600000 13900000 y 42.209476 有 18 | 13900000 14300000 o 108.595810 19 | 14300000 14700000 w 76.075417 20 | 14700000 14700000 sp -0.132768 sp 21 | 14700000 15000000 i 39.316082 一 22 | 15000000 15000000 sp -0.132768 sp 23 | 15000000 15400000 Z 95.461594 种 24 | 15400000 16500000 o 296.505981 25 | 16500000 18100000 N 454.212799 26 | 18100000 18100000 sp -0.132768 sp 27 | 18100000 19700000 t 323.293182 特别 28 | 19700000 20000000 & 13.081193 29 | 20000000 20400000 b 60.888390 30 | 20400000 21900000 y 256.354950 31 | 21900000 22400000 E 47.157375 32 | 22400000 22900000 sp 94.957649 sp 33 | 22900000 24200000 t 242.286911 特别 34 | 24200000 24600000 & 45.574699 35 | 24600000 24900000 b 47.007507 36 | 24900000 25200000 y 34.037094 37 | 25200000 27000000 E 380.628143 38 | 27000000 28100000 sp 380.686890 sp 39 | 28100000 28600000 b 113.949867 不 40 | 28600000 29000000 u 39.782421 41 | 29000000 29000000 sp -0.132768 sp 42 | 29000000 29400000 i 68.105476 一样 43 | 29400000 29900000 y 115.556313 44 | 29900000 31100000 a 407.338684 45 | 31100000 31400000 N 61.972153 46 | 31400000 31400000 sp -0.132768 sp 47 | 31400000 31700000 d 30.655909 的 48 | 31700000 32000000 & 49.452694 49 | 32000000 32000000 sp -0.132768 sp 50 | 32000000 32700000 g 87.686951 感受 51 | 32700000 33300000 @ 108.202324 52 | 33300000 33700000 n 50.980083 53 | 33700000 34200000 S 105.961426 54 | 34200000 34700000 o 121.505142 55 | 34700000 35200000 w 121.629051 56 | 35200000 35200000 sp -0.132768 sp 57 | 35200000 35500000 r 86.166107 让 58 | 35500000 36000000 a 171.147919 59 | 36000000 36300000 N 95.012085 60 | 36300000 36300000 sp -0.132768 sp 61 | 36300000 37200000 r 398.568024 人 62 | 37200000 39300000 & 981.832825 63 | 39300000 39600000 n 51.836075 64 | 39600000 45400000 sp 1947.180298 sp 65 | . 66 | -------------------------------------------------------------------------------- /test/test_8000.txt: -------------------------------------------------------------------------------- 1 | 噢 那 一定 是 有 一 种 特别 特别 不 一样 的 感受 让 人 2 | -------------------------------------------------------------------------------- /test/test_8000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenchenzi/P2FA_Mandarin_py3/57d331f8e218f1c4e4bc19e28064e603b67600fb/test/test_8000.wav --------------------------------------------------------------------------------