├── julius4seg
    ├── __init__.py
    ├── sp_remover.py
    └── sp_inserter.py
├── sample
    ├── sample_kan.txt
    ├── result.png
    ├── sample_kana.txt
    ├── sample_voice.wav
    ├── run.sh
    ├── run_remover.py
    ├── README.md
    └── run_segment.py
├── .gitignore
├── LICENSE
├── Dockerfile
└── README.md


/julius4seg/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sample/sample_kan.txt:
--------------------------------------------------------------------------------
1 | また 東寺のように 五大明王と 呼ばれる 主要な 明王の 中央に 配されることも多い
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfa
2 | *.dict
3 | .mypy_cache
4 | __pycache__
5 | sample/*.txt
6 | 


--------------------------------------------------------------------------------
/sample/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yamachu/julius4seg/HEAD/sample/result.png


--------------------------------------------------------------------------------
/sample/sample_kana.txt:
--------------------------------------------------------------------------------
1 | マタ トージノヨーニ ゴダイミョウオート ヨバレル シュヨーナ ミョーオーノ チューオーニ ハイサレルコトモオーイ
2 | 


--------------------------------------------------------------------------------
/sample/sample_voice.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yamachu/julius4seg/HEAD/sample/sample_voice.wav


--------------------------------------------------------------------------------
/sample/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | if [ $# -lt 1 ]; then
 5 |     echo 'usage: <sp-remove,sp-segment> args...'
 6 |     echo 'see: https://github.com/yamachu/julius4seg'
 7 |     exit 1
 8 | fi
 9 | 
10 | case "$1" in
11 |     "sp-remove" ) python3 run_remover.py ${@:2:($#-1)} ;;
12 |     "sp-segment" ) python3 run_segment.py ${@:2:($#-1)} ;;
13 |     * ) echo "sp-remove or sp-segment only" ;;
14 | esac
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Yusuke Yamada
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | # For supporting Japanese in Python3
 4 | ENV LC_CTYPE=C.UTF-8
 5 | 
 6 | # Install git, git-lfs and Julius dependencies
 7 | RUN apt-get update && \
 8 |     apt-get install -y curl git python3 libgomp1 pulseaudio && \
 9 |     apt-get clean && \
10 |     curl -sLO https://github.com/git-lfs/git-lfs/releases/download/v2.10.0/git-lfs-linux-amd64-v2.10.0.tar.gz && \
11 |     tar -zxf git-lfs-linux-amd64-v2.10.0.tar.gz git-lfs && \
12 |     mv git-lfs /usr/bin/ && \
13 |     rm git-lfs-linux-amd64-v2.10.0.tar.gz && \
14 |     git lfs install --skip-smudge
15 | 
16 | ARG DICTATION_KIT_HASH="1ceb4dec245ef482918ca33c55c71d383dce145e"
17 | RUN git clone https://github.com/julius-speech/dictation-kit.git /opt/dictation-kit && \
18 |     cd /opt/dictation-kit && \
19 |     git checkout ${DICTATION_KIT_HASH} && \
20 |     git lfs fetch origin --recent -I "model/phone_m/jnas-mono-16mix-gid*" && \
21 |     git lfs checkout origin "model/phone_m/jnas-mono-16mix-gid*"
22 | 
23 | ARG JULIUS4SEG_HASH="d83a954489d4d8ba605982355f6d95724a8121df"
24 | RUN git clone https://github.com/yamachu/julius4seg.git /opt/julius4seg && \
25 |     cd /opt/julius4seg && \
26 |     git checkout ${JULIUS4SEG_HASH} && \
27 |     cd /opt/julius4seg/sample && \
28 |     chmod +x ./run.sh
29 | 
30 | WORKDIR /opt/julius4seg/sample
31 | 
32 | ENTRYPOINT ["./run.sh"]
33 | 


--------------------------------------------------------------------------------
/sample/run_remover.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | 
 4 | from julius4seg import sp_remover
 5 | import array
 6 | import argparse
 7 | import wave
 8 | 
 9 | 
10 | def main(args: dict):
11 |     sp_remover.MARGIN = int(args.margin)
12 |     with open(args.input_seg_file) as f:
13 |         label = [s.strip() for s in f]
14 |     
15 |     sp_label = sp_remover.get_sp_segment(label)
16 |     
17 |     tmp = sp_remover.get_wav_sp_removed(args.wav_file, sp_label, args.edge_only, int(args.start), int(args.end))
18 |     
19 |     with wave.open(args.output_wav_file, 'wb') as f:
20 |         f.setnchannels(1)
21 |         f.setsampwidth(2)
22 |         f.setframerate(16000)
23 |         f.writeframes(array.array('h' , tmp).tostring())
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     parser = argparse.ArgumentParser('sp remove demo')
28 |     
29 |     parser.add_argument('wav_file')
30 |     parser.add_argument('input_seg_file')
31 |     parser.add_argument('output_wav_file')
32 |     parser.add_argument('-s', '--start', default=0, help='発話始点の残す無音区間の量[msec]: -Eの時のみ有効')
33 |     parser.add_argument('-e', '--end', default=0, help='発話終端の残す無音区間の量[msec]: -Eの時のみ有効')
34 |     parser.add_argument('-E', '--edge-only', action='store_true', help='発話の前後の無音区間のみを行う')
35 |     parser.add_argument('-m', '--margin', default=5,  help='有声区間と無声区間のマージン[msec]')
36 | 
37 |     args = parser.parse_args()
38 | 
39 |     main(args)
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # julius4seg
 2 | 
 3 | [Julius Japanese Dictation-kit](https://github.com/julius-speech/dictation-kit)をPythonから叩いている風にするためのスクリプト．
 4 | 
 5 | grammer-kitとsegmentation-kitを足して2で割ったような立ち位置．
 6 | 
 7 | ## Usage
 8 | 
 9 | ### Cloneして使う方
10 | 
11 | see: https://github.com/yamachu/julius4seg/blob/master/sample/README.md
12 | 
13 | ### Dockerで使う方
14 | 
15 | see: https://hub.docker.com/r/yamachu/julius4seg
16 | 
17 | ```sh
18 | $ docker pull yamachu/julius4seg:latest
19 | ```
20 | 
21 | #### segmentationしたい方
22 | 
23 | コマンド例
24 | 
25 | ```sh
26 | $ docker run --rm -v `pwd`/sample:/tmp yamachu/julius4seg sp-segment /tmp/sample_voice.wav /tmp/sample_kana.txt /tmp/seg.txt
27 | ```
28 | 
29 | 第一引数に `sp-segment` を入れて、その後に続く引数は[sample](https://github.com/yamachu/julius4seg/blob/master/sample/README.md)の `run_segment.py` と同様
30 | 
31 | ファイル入力前提で作られているので、ローカルのディレクトリをマウントして、そのファイルを指定するようにして下さい。
32 | 
33 | #### silenceを除去したい方
34 | 
35 | コマンド例
36 | 
37 | ```sh
38 | $ docker run --rm -v `pwd`/sample:/tmp yamachu/julius4seg sp-remove /tmp/sample_voice.wav /tmp/seg.txt /tmp/out.wav
39 | ```
40 | 
41 | 第一引数に `sp-remove` を入れて、その後に続く引数は[sample](https://github.com/yamachu/julius4seg/blob/master/sample/README.md)の `run_remover.py` と同様
42 | 
43 | ## 注意事項
44 | 
45 | このスクリプトを実行するのに依存しているDictation-kitはgit lfsがインストールされていないと音響モデルも一緒にクローンできないため注意．
46 | 
47 | Juliusの標準的なサポートフォーマットである16kHz, 16bit, monoの音声を対象としている．
48 | 
49 | macOSX, Python3.6で動作を確認．
50 | 


--------------------------------------------------------------------------------
/julius4seg/sp_remover.py:
--------------------------------------------------------------------------------
 1 | import array
 2 | import struct
 3 | import wave
 4 | 
 5 | 
 6 | # 有声音素を削らないためのマージン
 7 | MARGIN = 5
 8 | 
 9 | 
10 | def get_sp_segment(time_list: [str]) -> [[int]]:
11 |     '''音素セグメントリストから無音区間の部分のみを抽出
12 |     args:
13 |         time_list ([str]): 音素セグメントリスト
14 |     returns:
15 |         [[int]]: 無音区間の初めと終わりのフレームのリスト
16 |     '''
17 |     sps = [list(map(int, s.split()[:2])) for s in time_list if 'silB' in s or 'silE' in s or 'sp' in s]
18 |     return sps
19 | 
20 | 
21 | def get_wav_sp_removed(wav_file_name: str, sp_segment: [[int]], only_edge: bool = False, start_margin: int = MARGIN, end_margin: int = MARGIN) -> [int]:
22 |     with wave.open(wav_file_name) as f:
23 |         n = f.getnframes()
24 |         data = struct.unpack('h'*n, f.readframes(n))
25 |         
26 |     removed = []
27 |     
28 |     seg_start = 0
29 | 
30 |     if only_edge:
31 |         tmp = sp_segment[0][1] * 10 - start_margin
32 |         seg_start = tmp if tmp > 0 else sp_segment[0][0] * 10
33 | 
34 |         tmp = sp_segment[-1][0] * 10 + end_margin
35 |         seg_end = tmp if tmp < sp_segment[-1][1] * 10 else sp_segment[-1][1] * 10
36 | 
37 |         removed.extend(data[int(seg_start / 1000 * 16000):int(seg_end / 1000 * 16000)])
38 |     else:
39 |         for i, seg in enumerate(sp_segment):
40 |             if i == 0:
41 |                 seg_start = seg[1] * 10 - MARGIN # ms
42 |                 continue
43 |                 
44 |             seg_end = seg[0] * 10 + MARGIN
45 |             
46 |             removed.extend(data[int(seg_start / 1000 * 16000):int(seg_end / 1000 * 16000)])
47 |             
48 |             if i != len(sp_segment) - 1:
49 |                 seg_start = seg[1] * 10 - MARGIN
50 |             
51 |     return removed
52 | 


--------------------------------------------------------------------------------
/sample/README.md:
--------------------------------------------------------------------------------
  1 | # サンプル
  2 | 
  3 | ## セグメンテーションツール
  4 | 
  5 | ### 実行例
  6 | 
  7 | spを挿入したテキストが必要な場合（spを考慮した言語モデルの作成などの用途）
  8 | 
  9 | ```
 10 | python3 run_segment.py sample_voice.wav -it sample_kan.txt -ot sp_kan.txt sample_kana.txt seg.txt
 11 | ```
 12 | 
 13 | 音素のセグメントのみが必要な場合（主に合成などで無音区間を除去したい場合など）
 14 | 
 15 | ```
 16 | python3 run_segment.py sample_voice.wav sample_kana.txt seg.txt
 17 | ```
 18 | 
 19 | ### 注意事項
 20 | 
 21 | _run_segment.py_ 内の
 22 | ```
 23 | sp_inserter.JULIUS_ROOT = PurePath('/Users/yamachu/tmp/dictation-kit')
 24 | ```
 25 | のPATHを自分の環境に合わせること．
 26 | 
 27 | またJuliusの制約上，長い音声をセグメンテーションしようとした場合，失敗することがあります．
 28 | 
 29 | <details>
 30 | <summary>セグメント結果の例</summary>
 31 | 
 32 | ```
 33 | 0 71 silB
 34 | 72 74 m
 35 | 75 83 a
 36 | 84 91 t
 37 | 92 108 a
 38 | 109 137 sp
 39 | 138 144 t
 40 | 145 164 o:
 41 | 165 170 j
 42 | 171 173 i
 43 | 174 179 n
 44 | 180 185 o
 45 | 186 194 y
 46 | 195 211 o:
 47 | 212 214 n
 48 | 215 230 i
 49 | 231 286 sp
 50 | 287 291 g
 51 | 292 298 o
 52 | 299 304 d
 53 | 305 313 a
 54 | 314 320 i
 55 | 321 337 my
 56 | 338 342 o
 57 | 343 345 u
 58 | 346 356 o:
 59 | 357 362 t
 60 | 363 365 o
 61 | 366 372 y
 62 | 373 375 o
 63 | 376 382 b
 64 | 383 386 a
 65 | 387 389 r
 66 | 390 397 e
 67 | 398 402 r
 68 | 403 420 u
 69 | 421 453 sp
 70 | 454 472 sh
 71 | 473 475 u
 72 | 476 484 y
 73 | 485 500 o:
 74 | 501 503 n
 75 | 504 512 a
 76 | 513 525 my
 77 | 526 532 o:
 78 | 533 552 o:
 79 | 553 557 n
 80 | 558 573 o
 81 | 574 589 sp
 82 | 590 605 ch
 83 | 606 619 u:
 84 | 620 635 o:
 85 | 636 640 n
 86 | 641 645 i
 87 | 646 654 h
 88 | 655 662 a
 89 | 663 666 i
 90 | 667 674 s
 91 | 675 679 a
 92 | 680 682 r
 93 | 683 690 e
 94 | 691 693 r
 95 | 694 696 u
 96 | 697 706 k
 97 | 707 710 o
 98 | 711 715 t
 99 | 716 720 o
100 | 721 729 m
101 | 730 735 o
102 | 736 762 o:
103 | 763 772 i
104 | 773 872 silE
105 | ```
106 | </details>
107 | 
108 | ## 無音除去ツール
109 | 
110 | セグメンテーションツールより得られたセグメンテーションファイルを元にファイルの無音区間を除去する．
111 | 
112 | ### 実行例
113 | 
114 | 全ての無音データを削除する場合
115 | 
116 | ```
117 | python3 run_remover.py sample_voice.wav seg.txt out.wav
118 | ```
119 | 
120 | 音声の先頭と終端の無音区間のトリミングを行う場合（例では500msecに揃える）
121 | 
122 | ```
123 | python3 run_remover.py sample_voice.wav seg.txt out.wav -s 500 -e 500 -E
124 | ```
125 | 
126 | また無音区間と判定された最初のフレームの初めから、また最後のフレームの終わりからn[msec]を削除できる `m` オプションを使用できる．
127 | 
128 | ![実行結果](https://github.com/yamachu/julius4seg/raw/master/sample/result.png "サンプル")
129 | 
130 | 
131 | ## その他
132 | 
133 | サンプルのテキストは[日本声優統計学会](http://voice-statistics.github.io/)より，[声優統計コーパス 音素バランス文](https://github.com/voice-statistics/voice-statistics.github.com/blob/master/assets/doc/balance_sentences.txt)の001をお借りいたしました．
134 | 
135 | またsample音声は本サンプルの実行以外での使用を禁じます．
136 | 
137 | 


--------------------------------------------------------------------------------
/sample/run_segment.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('..')
  3 | 
  4 | from julius4seg import sp_inserter
  5 | from pathlib import PurePath
  6 | import argparse
  7 | from logging import DEBUG, FileHandler
  8 | 
  9 | # MUST CHANGE YOUR JULIUS DICTATION-KIT PATH
 10 | sp_inserter.JULIUS_ROOT = PurePath('/opt/dictation-kit')
 11 | 
 12 | # If you want to handle error, uncomment-out
 13 | # fhandler = FileHandler(logname + '.log')
 14 | # fhandler.setLevel(DEBUG)
 15 | # sp_inserter.logger.addHandler(fhandler)
 16 | 
 17 | 
 18 | def main(args: dict):
 19 |     utt_id = PurePath(args.wav_file).name.split('.')[0]
 20 | 
 21 |     with open(args.input_kana_file) as f:
 22 |         base_kata_text = f.readline().strip()
 23 | 
 24 |     if args.input_text_file:
 25 |         with open(args.input_text_file) as f:
 26 |             base_kan_text = f.readline().strip().split()
 27 |     else:
 28 |         base_kan_text = ['sym_{}'.format(i) for i in range(len(base_kata_text.split()))]
 29 | 
 30 |     assert len(base_kan_text) == len(base_kata_text.split())
 31 |     
 32 |     julius_phones = [sp_inserter.conv2julius(hira) for hira in [sp_inserter.kata2hira(kata) for kata in base_kata_text.split()]]
 33 |     
 34 |     dict_1st = sp_inserter.gen_julius_dict_1st(base_kan_text, julius_phones)
 35 |     dfa_1st = sp_inserter.gen_julius_dfa(dict_1st.count('\n'))
 36 |     
 37 |     with open('first_pass.dict', 'w') as f:
 38 |         f.write(dict_1st)
 39 |         
 40 |     with open('first_pass.dfa', 'w') as f:
 41 |         f.write(dfa_1st)
 42 |         
 43 |     raw_first_output = sp_inserter.julius_sp_insert(args.wav_file, 'first_pass', args.hmm_model)
 44 |     
 45 |     forced_text_with_sp = []
 46 |     forced_phones_with_sp = []
 47 | 
 48 |     try:
 49 |         _, sp_position = sp_inserter.get_sp_inserted_text(raw_first_output, utt_id)
 50 |         
 51 |         for j, zipped in enumerate(zip(base_kan_text, julius_phones)):
 52 |             forced_text_with_sp.append(zipped[0])
 53 |             forced_phones_with_sp.append(zipped[1])
 54 |             if j in sp_position:
 55 |                 forced_text_with_sp.append('<sp>')
 56 |                 forced_phones_with_sp.append('sp')
 57 |                 
 58 |         forced_text_with_sp = ' '.join(forced_text_with_sp)
 59 |         forced_phones_with_sp = ' '.join(forced_phones_with_sp)
 60 |     except:
 61 |         pass
 62 |     
 63 |     phones_with_sp = sp_inserter.get_sp_inserterd_phone_seqence(raw_first_output, utt_id)
 64 |     
 65 |     if len(forced_phones_with_sp) < 2:
 66 |         forced_phones_with_sp = phones_with_sp
 67 | 
 68 |     dict_2nd = sp_inserter.gen_julius_dict_2nd(forced_phones_with_sp)
 69 |     dfa_2nd = sp_inserter.gen_julius_aliment_dfa()
 70 | 
 71 |     with open('second_pass.dict', 'w') as f:
 72 |         f.write(dict_2nd)
 73 | 
 74 |     with open('second_pass.dfa', 'w') as f:
 75 |         f.write(dfa_2nd)
 76 | 
 77 |     raw_second_output = sp_inserter.julius_phone_alignment(args.wav_file, 'second_pass', args.hmm_model)
 78 | 
 79 |     time_alimented_list = sp_inserter.get_time_alimented_list(raw_second_output)
 80 | 
 81 |     if args.output_text_file:
 82 |         with open(args.output_text_file, 'w') as f:
 83 |             f.write(forced_text_with_sp + '\n')
 84 | 
 85 |     with open(args.output_seg_file, 'w') as f:
 86 |         for ss in time_alimented_list:
 87 |             f.write(' '.join(list(ss)) + '\n')
 88 | 
 89 | 
 90 | if __name__ == '__main__':
 91 |     parser = argparse.ArgumentParser('sp insert demo by Julius')
 92 |     
 93 |     parser.add_argument('wav_file', help='入力音声')
 94 |     parser.add_argument('input_kana_file', help='スペース区切りのカナ読みファイル')
 95 |     parser.add_argument('output_seg_file', help='時間情報付き音素セグメントファイル')
 96 | 
 97 |     parser.add_argument('-it','--input_text_file', help='漢字仮名交じり文')
 98 |     parser.add_argument('-ot', '--output_text_file', help='漢字仮名交じり文にspを挿入したもの')
 99 |     
100 |     parser.add_argument('--hmm_model', help='support mono-phone model only')
101 | 
102 |     args = parser.parse_args()
103 | 
104 |     main(args)
105 | 


--------------------------------------------------------------------------------
/julius4seg/sp_inserter.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import sys
  4 | import subprocess
  5 | from itertools import chain
  6 | from pathlib import Path, PurePath
  7 | 
  8 | from logging import getLogger, DEBUG, NullHandler
  9 | logger = getLogger(__name__)
 10 | logger.addHandler(NullHandler())
 11 | logger.setLevel(DEBUG)
 12 | logger.propagate = False
 13 | 
 14 | 
 15 | # MUST CHANGE
 16 | JULIUS_ROOT = PurePath('.')
 17 | 
 18 | 
 19 | def get_os_dependent_directory() -> str:
 20 |     '''Juluis Segmentaion-Kitのディレクトリ名をOSの種類から取得
 21 |     returns:
 22 |         (str): OS依存のパスの一部
 23 |     '''
 24 |     if sys.platform.startswith('win') or sys.platform.startswith('cygwin'):
 25 |         return 'windows'
 26 |     elif sys.platform.startswith('darwin'):
 27 |         return 'osx'
 28 |     elif sys.platform.startswith('linux'):
 29 |         return  'linux'
 30 | 
 31 | 
 32 | def get_os_dependent_exec() -> str:
 33 |     '''Juliusの実行ファイル名を取得
 34 |     returns:
 35 |         (str): Juliusの実行ファイル名
 36 |     '''
 37 |     if sys.platform.startswith('win') or sys.platform.startswith('cygwin'):
 38 |         return 'julius.exe'
 39 |     else:
 40 |         return 'julius'
 41 | 
 42 | 
 43 | def kata2hira(kana: str) -> str:
 44 |     '''ヴ，ヵ，ヶ以外のカタカナをひらがなに変換
 45 |     args:
 46 |         kana(str): カタカナ文字列
 47 |             "ヤキニク"
 48 |     returns:
 49 |         (str): ひらがな文字列
 50 |             "やきにく"
 51 |     '''
 52 |     return ''.join([chr(ord(c) + ord('あ') - ord('ア')) if c != 'ー' else 'ー' for c in kana])
 53 | 
 54 | 
 55 | def conv2julius(s: str) -> str:
 56 |     '''入力の単語の読み（ひらがな）をJuliusの音素列に変換
 57 |     args:
 58 |         kana(str): カタカナ文字列
 59 |             "やきにく"
 60 |     returns:
 61 |         (str): ひらがな文字列
 62 |             " y a k i n i k u"
 63 |     '''
 64 |     s = s.replace('あぁ',' a a')
 65 |     s = s.replace('いぃ',' i i')
 66 |     s = s.replace('いぇ',' i e')
 67 |     s = s.replace('いゃ',' y a')
 68 |     s = s.replace('うぅ',' u:')
 69 |     s = s.replace('えぇ',' e e')
 70 |     s = s.replace('おぉ',' o:')
 71 |     s = s.replace('かぁ',' k a:')
 72 |     s = s.replace('きぃ',' k i:')
 73 |     s = s.replace('くぅ',' k u:')
 74 |     s = s.replace('くゃ',' ky a')
 75 |     s = s.replace('くゅ',' ky u')
 76 |     s = s.replace('くょ',' ky o')
 77 |     s = s.replace('けぇ',' k e:')
 78 |     s = s.replace('こぉ',' k o:')
 79 |     s = s.replace('がぁ',' g a:')
 80 |     s = s.replace('ぎぃ',' g i:')
 81 |     s = s.replace('ぐぅ',' g u:')
 82 |     s = s.replace('ぐゃ',' gy a')
 83 |     s = s.replace('ぐゅ',' gy u')
 84 |     s = s.replace('ぐょ',' gy o')
 85 |     s = s.replace('げぇ',' g e:')
 86 |     s = s.replace('ごぉ',' g o:')
 87 |     s = s.replace('さぁ',' s a:')
 88 |     s = s.replace('しぃ',' sh i:')
 89 |     s = s.replace('すぅ',' s u:')
 90 |     s = s.replace('すゃ',' sh a')
 91 |     s = s.replace('すゅ',' sh u')
 92 |     s = s.replace('すょ',' sh o')
 93 |     s = s.replace('せぇ',' s e:')
 94 |     s = s.replace('そぉ',' s o:')
 95 |     s = s.replace('ざぁ',' z a:')
 96 |     s = s.replace('じぃ',' j i:')
 97 |     s = s.replace('ずぅ',' z u:')
 98 |     s = s.replace('ずゃ',' zy a')
 99 |     s = s.replace('ずゅ',' zy u')
100 |     s = s.replace('ずょ',' zy o')
101 |     s = s.replace('ぜぇ',' z e:')
102 |     s = s.replace('ぞぉ',' z o:')
103 |     s = s.replace('たぁ',' t a:')
104 |     s = s.replace('ちぃ',' ch i:')
105 |     s = s.replace('つぁ',' ts a')
106 |     s = s.replace('つぃ',' ts i')
107 |     s = s.replace('つぅ',' ts u:')
108 |     s = s.replace('つゃ',' ch a')
109 |     s = s.replace('つゅ',' ch u')
110 |     s = s.replace('つょ',' ch o')
111 |     s = s.replace('つぇ',' ts e')
112 |     s = s.replace('つぉ',' ts o')
113 |     s = s.replace('てぇ',' t e:')
114 |     s = s.replace('とぉ',' t o:')
115 |     s = s.replace('だぁ',' d a:')
116 |     s = s.replace('ぢぃ',' j i:')
117 |     s = s.replace('づぅ',' d u:')
118 |     s = s.replace('づゃ',' zy a')
119 |     s = s.replace('づゅ',' zy u')
120 |     s = s.replace('づょ',' zy o')
121 |     s = s.replace('でぇ',' d e:')
122 |     s = s.replace('どぉ',' d o:')
123 |     s = s.replace('なぁ',' n a:')
124 |     s = s.replace('にぃ',' n i:')
125 |     s = s.replace('ぬぅ',' n u:')
126 |     s = s.replace('ぬゃ',' ny a')
127 |     s = s.replace('ぬゅ',' ny u')
128 |     s = s.replace('ぬょ',' ny o')
129 |     s = s.replace('ねぇ',' n e:')
130 |     s = s.replace('のぉ',' n o:')
131 |     s = s.replace('はぁ',' h a:')
132 |     s = s.replace('ひぃ',' h i:')
133 |     s = s.replace('ふぅ',' f u:')
134 |     s = s.replace('ふゃ',' hy a')
135 |     s = s.replace('ふゅ',' hy u')
136 |     s = s.replace('ふょ',' hy o')
137 |     s = s.replace('へぇ',' h e:')
138 |     s = s.replace('ほぉ',' h o:')
139 |     s = s.replace('ばぁ',' b a:')
140 |     s = s.replace('びぃ',' b i:')
141 |     s = s.replace('ぶぅ',' b u:')
142 |     s = s.replace('ふゃ',' hy a')
143 |     s = s.replace('ぶゅ',' by u')
144 |     s = s.replace('ふょ',' hy o')
145 |     s = s.replace('べぇ',' b e:')
146 |     s = s.replace('ぼぉ',' b o:')
147 |     s = s.replace('ぱぁ',' p a:')
148 |     s = s.replace('ぴぃ',' p i:')
149 |     s = s.replace('ぷぅ',' p u:')
150 |     s = s.replace('ぷゃ',' py a')
151 |     s = s.replace('ぷゅ',' py u')
152 |     s = s.replace('ぷょ',' py o')
153 |     s = s.replace('ぺぇ',' p e:')
154 |     s = s.replace('ぽぉ',' p o:')
155 |     s = s.replace('まぁ',' m a:')
156 |     s = s.replace('みぃ',' m i:')
157 |     s = s.replace('むぅ',' m u:')
158 |     s = s.replace('むゃ',' my a')
159 |     s = s.replace('むゅ',' my u')
160 |     s = s.replace('むょ',' my o')
161 |     s = s.replace('めぇ',' m e:')
162 |     s = s.replace('もぉ',' m o:')
163 |     s = s.replace('やぁ',' y a:')
164 |     s = s.replace('ゆぅ',' y u:')
165 |     s = s.replace('ゆゃ',' y a:')
166 |     s = s.replace('ゆゅ',' y u:')
167 |     s = s.replace('ゆょ',' y o:')
168 |     s = s.replace('よぉ',' y o:')
169 |     s = s.replace('らぁ',' r a:')
170 |     s = s.replace('りぃ',' r i:')
171 |     s = s.replace('るぅ',' r u:')
172 |     s = s.replace('るゃ',' ry a')
173 |     s = s.replace('るゅ',' ry u')
174 |     s = s.replace('るょ',' ry o')
175 |     s = s.replace('れぇ',' r e:')
176 |     s = s.replace('ろぉ',' r o:')
177 |     s = s.replace('わぁ',' w a:')
178 |     s = s.replace('をぉ',' o:')
179 | 
180 |     s = s.replace('ゔ',' b u')
181 |     s = s.replace('でぃ',' d i')
182 |     s = s.replace('でぇ',' d e:')
183 |     s = s.replace('でゃ',' dy a')
184 |     s = s.replace('でゅ',' dy u')
185 |     s = s.replace('でょ',' dy o')
186 |     s = s.replace('てぃ',' t i')
187 |     s = s.replace('てぇ',' t e:')
188 |     s = s.replace('てゃ',' ty a')
189 |     s = s.replace('てゅ',' ty u')
190 |     s = s.replace('てょ',' ty o')
191 |     s = s.replace('すぃ',' s i')
192 |     s = s.replace('ずぁ',' z u a')
193 |     s = s.replace('ずぃ',' z i')
194 |     s = s.replace('ずぅ',' z u')
195 |     s = s.replace('ずゃ',' zy a')
196 |     s = s.replace('ずゅ',' zy u')
197 |     s = s.replace('ずょ',' zy o')
198 |     s = s.replace('ずぇ',' z e')
199 |     s = s.replace('ずぉ',' z o')
200 |     s = s.replace('きゃ',' ky a')
201 |     s = s.replace('きゅ',' ky u')
202 |     s = s.replace('きょ',' ky o')
203 |     s = s.replace('しゃ',' sh a')
204 |     s = s.replace('しゅ',' sh u')
205 |     s = s.replace('しぇ',' sh e')
206 |     s = s.replace('しょ',' sh o')
207 |     s = s.replace('ちゃ',' ch a')
208 |     s = s.replace('ちゅ',' ch u')
209 |     s = s.replace('ちぇ',' ch e')
210 |     s = s.replace('ちょ',' ch o')
211 |     s = s.replace('とぅ',' t u')
212 |     s = s.replace('とゃ',' ty a')
213 |     s = s.replace('とゅ',' ty u')
214 |     s = s.replace('とょ',' ty o')
215 |     s = s.replace('どぁ',' d o a')
216 |     s = s.replace('どぅ',' d u')
217 |     s = s.replace('どゃ',' dy a')
218 |     s = s.replace('どゅ',' dy u')
219 |     s = s.replace('どょ',' dy o')
220 |     s = s.replace('どぉ',' d o:')
221 |     s = s.replace('にゃ',' ny a')
222 |     s = s.replace('にゅ',' ny u')
223 |     s = s.replace('にょ',' ny o')
224 |     s = s.replace('ひゃ',' hy a')
225 |     s = s.replace('ひゅ',' hy u')
226 |     s = s.replace('ひょ',' hy o')
227 |     s = s.replace('みゃ',' my a')
228 |     s = s.replace('みゅ',' my u')
229 |     s = s.replace('みょ',' my o')
230 |     s = s.replace('りゃ',' ry a')
231 |     s = s.replace('りゅ',' ry u')
232 |     s = s.replace('りょ',' ry o')
233 |     s = s.replace('ぎゃ',' gy a')
234 |     s = s.replace('ぎゅ',' gy u')
235 |     s = s.replace('ぎょ',' gy o')
236 |     s = s.replace('ぢぇ',' j e')
237 |     s = s.replace('ぢゃ',' j a')
238 |     s = s.replace('ぢゅ',' j u')
239 |     s = s.replace('ぢょ',' j o')
240 |     s = s.replace('じぇ',' j e')
241 |     s = s.replace('じゃ',' j a')
242 |     s = s.replace('じゅ',' j u')
243 |     s = s.replace('じょ',' j o')
244 |     s = s.replace('びゃ',' by a')
245 |     s = s.replace('びゅ',' by u')
246 |     s = s.replace('びょ',' by o')
247 |     s = s.replace('ぴゃ',' py a')
248 |     s = s.replace('ぴゅ',' py u')
249 |     s = s.replace('ぴょ',' py o')
250 |     s = s.replace('うぁ',' u a')
251 |     s = s.replace('うぃ',' w i')
252 |     s = s.replace('うぇ',' w e')
253 |     s = s.replace('うぉ',' w o')
254 |     s = s.replace('ふぁ',' f a')
255 |     s = s.replace('ふぃ',' f i')
256 |     s = s.replace('ふぅ',' f u')
257 |     s = s.replace('ふゃ',' hy a')
258 |     s = s.replace('ふゅ',' hy u')
259 |     s = s.replace('ふょ',' hy o')
260 |     s = s.replace('ふぇ',' f e')
261 |     s = s.replace('ふぉ',' f o')
262 | 
263 |     # 1音からなる変換規則
264 |     s = s.replace('あ',' a')
265 |     s = s.replace('い',' i')
266 |     s = s.replace('う',' u')
267 |     s = s.replace('え',' e')
268 |     s = s.replace('お',' o')
269 |     s = s.replace('か',' k a')
270 |     s = s.replace('き',' k i')
271 |     s = s.replace('く',' k u')
272 |     s = s.replace('け',' k e')
273 |     s = s.replace('こ',' k o')
274 |     s = s.replace('さ',' s a')
275 |     s = s.replace('し',' sh i')
276 |     s = s.replace('す',' s u')
277 |     s = s.replace('せ',' s e')
278 |     s = s.replace('そ',' s o')
279 |     s = s.replace('た',' t a')
280 |     s = s.replace('ち',' ch i')
281 |     s = s.replace('つ',' ts u')
282 |     s = s.replace('て',' t e')
283 |     s = s.replace('と',' t o')
284 |     s = s.replace('な',' n a')
285 |     s = s.replace('に',' n i')
286 |     s = s.replace('ぬ',' n u')
287 |     s = s.replace('ね',' n e')
288 |     s = s.replace('の',' n o')
289 |     s = s.replace('は',' h a')
290 |     s = s.replace('ひ',' h i')
291 |     s = s.replace('ふ',' f u')
292 |     s = s.replace('へ',' h e')
293 |     s = s.replace('ほ',' h o')
294 |     s = s.replace('ま',' m a')
295 |     s = s.replace('み',' m i')
296 |     s = s.replace('む',' m u')
297 |     s = s.replace('め',' m e')
298 |     s = s.replace('も',' m o')
299 |     s = s.replace('ら',' r a')
300 |     s = s.replace('り',' r i')
301 |     s = s.replace('る',' r u')
302 |     s = s.replace('れ',' r e')
303 |     s = s.replace('ろ',' r o')
304 |     s = s.replace('が',' g a')
305 |     s = s.replace('ぎ',' g i')
306 |     s = s.replace('ぐ',' g u')
307 |     s = s.replace('げ',' g e')
308 |     s = s.replace('ご',' g o')
309 |     s = s.replace('ざ',' z a')
310 |     s = s.replace('じ',' j i')
311 |     s = s.replace('ず',' z u')
312 |     s = s.replace('ぜ',' z e')
313 |     s = s.replace('ぞ',' z o')
314 |     s = s.replace('だ',' d a')
315 |     s = s.replace('ぢ',' j i')
316 |     s = s.replace('づ',' z u')
317 |     s = s.replace('で',' d e')
318 |     s = s.replace('ど',' d o')
319 |     s = s.replace('ば',' b a')
320 |     s = s.replace('び',' b i')
321 |     s = s.replace('ぶ',' b u')
322 |     s = s.replace('べ',' b e')
323 |     s = s.replace('ぼ',' b o')
324 |     s = s.replace('ぱ',' p a')
325 |     s = s.replace('ぴ',' p i')
326 |     s = s.replace('ぷ',' p u')
327 |     s = s.replace('ぺ',' p e')
328 |     s = s.replace('ぽ',' p o')
329 |     s = s.replace('や',' y a')
330 |     s = s.replace('ゆ',' y u')
331 |     s = s.replace('よ',' y o')
332 |     s = s.replace('わ',' w a')
333 |     s = s.replace('を',' o')
334 |     s = s.replace('ん',' N')
335 |     s = s.replace('っ',' q')
336 |     s = s.replace('ー',':')
337 | 
338 |     s = s.replace('ぁ',' a')
339 |     s = s.replace('ぃ',' i')
340 |     s = s.replace('ぅ',' u')
341 |     s = s.replace('ぇ',' e')
342 |     s = s.replace('ぉ',' o')
343 |     s = s.replace('ゎ',' w a')
344 | 
345 |     s = s[1:]
346 | 
347 |     s = re.sub(r':+', ':', s)
348 | 
349 |     return s
350 | 
351 | 
352 | def gen_julius_dict_1st(text_symbols: [str], word_phones: [str]) -> str:
353 |     '''テキストのシンボルと読みの音素のJulius dictファイルの中身を生成
354 |     args:
355 |         text_symbols ([str]): 単語のシンボル
356 |             ['今回', 'は']
357 |         word_phones ([str]): 単語の音素系列
358 |             ['k o N k a i', 'w a']
359 |     returns:
360 |         (str): Juliusのdictファイルの中身
361 |     '''
362 |     tmp = []
363 |     finit = len(text_symbols)
364 |     
365 |     for i, zipped in enumerate(zip(text_symbols, word_phones)):
366 |         tmp.append('{}\t[{}]\t{}'.format(i*2, *zipped))
367 |         if i + 1 != finit:
368 |             tmp.append('{}\t[{}]\t{}'.format(i*2+1, 'sp_{}'.format(i), 'sp'))
369 |         
370 |     # append sp and Start, End symbol
371 |     tmp.append('{}\t[{}]\t{}'.format(i*2+1, '<s>', 'silB'))
372 |     tmp.append('{}\t[{}]\t{}'.format((i+1)*2, '</s>', 'silE'))
373 |     
374 |     return '\n'.join(tmp) + '\n'
375 | 
376 | 
377 | def gen_julius_dfa(number_of_words: int) -> str:
378 |     '''単語数から遷移のためのJuliusのdfaファイルの中身を生成
379 |     args:
380 |         number_of_words (int): 遷移する単語の単語数
381 |     returns:
382 |         (str): Juliusのdfaファイルの中身
383 |     '''
384 |     i = 0
385 |     current_word = number_of_words - 3
386 |     isLast = False
387 |     tmp = []
388 |     while True:
389 |         if i == 0:
390 |             tmp.append('{} {} {} {} {}'.format(i, number_of_words - 1, i + 1, 0, 1))
391 |             i += 1
392 |         elif i > 0 and not isLast:
393 |             tmp.append('{} {} {} {} {}'.format(i, current_word, i + 1, 0, 0))
394 |             current_word -= 1
395 |             isLast = current_word == -1
396 |             i += 1
397 |         elif i > 0 and isLast:
398 |             tmp.append('{} {} {} {} {}'.format(i, i - 1, i + 1, 0, 0))
399 |             tmp.append('{} {} {} {} {}'.format(i + 1, -1, -1, 1, 0))
400 |             break        
401 |             
402 |     return '\n'.join(tmp) + '\n'
403 | 
404 | 
405 | def gen_julius_dict_2nd(phone_seqence: str) -> str:
406 |     '''音素系列から強制アライメントのためのdictファイルの中身を生成
407 |     args:
408 |         phone_seqence (str): 
409 |             'k o N k a i w a '
410 |     returns:
411 |         (str): Juliusのdictファイルの中身
412 |     '''
413 |     return '\n'.join([
414 |         '0\t[w_0]\tsilB',
415 |         '1\t[w_1]\t{}'.format(phone_seqence),
416 |         '2\t[w_2]\tsilE',
417 |     ]) + '\n'
418 | 
419 | 
420 | def gen_julius_aliment_dfa() -> str:
421 |     '''強制アライメント用のdfaファイルの中身を生成
422 |     returns:
423 |         (str): Juliusのdfaファイルの中身
424 |     '''
425 |     return '\n'.join([
426 |         '0 2 1 0 1',
427 |         '1 1 2 0 0',
428 |         '2 0 3 0 0',
429 |         '3 -1 -1 1 0'
430 |     ]) + '\n'
431 | 
432 | 
433 | def julius_sp_insert(target_wav_file: str, aliment_file_signiture: str, model_path: str = None) -> [str]:
434 |     julius_args = {
435 |         '-h': str(
436 |             JULIUS_ROOT / 'model' / 'phone_m' / 'jnas-mono-16mix-gid.binhmm'
437 |             ) if model_path is None else model_path,
438 |         '-input': 'file',
439 |         '-debug':'',
440 |         '-gram': aliment_file_signiture,
441 |     }
442 | 
443 |     file_echo_p = subprocess.Popen(['echo', target_wav_file], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
444 |     julius_p = subprocess.Popen(' '.join([str(JULIUS_ROOT / 'bin' / get_os_dependent_directory() / get_os_dependent_exec()),
445 |                             *list(chain.from_iterable([[k, v] for k, v in julius_args.items()]))]).split(), stdin=file_echo_p.stdout, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
446 |     file_echo_p.stdout.close()
447 |     return julius_p.communicate()[0].decode('utf-8').split('\n')
448 | 
449 | 
450 | def get_sp_inserted_text(raw_output: str, debug_symbol='') -> (str, [int]):
451 |     '''デコード結果からsp挿入後のテキストとspのインデックスを取得する
452 |     args:
453 |         raw_output: `julius_sp_insert`の出力
454 |     returns:
455 |         Tuple(str, [int]): デコード結果とspのindex
456 |     '''
457 |     r = re.compile('<s> (.*) </s>')
458 |     pass1_best = next(s for s in raw_output if s.startswith('pass1_best'))
459 |     matched = r.search(pass1_best)
460 |     if matched is None:
461 |         logger.warning('Failed Decoding Text [{}]'.format(debug_symbol))
462 |         raise Exception("Decode Failed")
463 |         
464 |     return (re.sub('sp_[\d+]', '<sp>', matched.group(1)), [int(s.split('_')[1]) for s in matched.group().split() if 'sp_' in s])
465 | 
466 | 
467 | def get_sp_inserterd_phone_seqence(raw_output: str, debug_symbol='') -> str:
468 |     pass1_best_phonemeseq = next(s for s in raw_output if s.startswith('pass1_best_phonemeseq'))
469 |     
470 |     complete_re = re.compile('silB \| (.*) \| silE')
471 |     failed_re_1 = re.compile('silE \| (.*) \| silB')
472 |     failed_re_2 = re.compile('silE \| (.*)')
473 | 
474 |     if complete_re.search(pass1_best_phonemeseq) is not None:
475 |         matched = complete_re.search(pass1_best_phonemeseq)
476 |     elif failed_re_1.search(pass1_best_phonemeseq) is not None:
477 |         logger.info('Use not correct re to generate Phoneseq [{}]'.format(debug_symbol))
478 |         matched = failed_re_1.search(pass1_best_phonemeseq)
479 |     elif failed_re_2.search(pass1_best_phonemeseq) is not None:
480 |         logger.info('Use not correct re to generate Phoneseq [{}]'.format(debug_symbol))
481 |         matched = failed_re_2.search(pass1_best_phonemeseq)
482 |     else:
483 |         logger.warning('Failed Generate Phoneseq [{}]'.format(debug_symbol))
484 |         raise Exception("Decode Failed")
485 |         
486 |     tmp = matched.group(1)
487 |     return ' '.join([s.strip() for s in tmp.split('|')])
488 | 
489 | 
490 | def julius_phone_alignment(target_wav_file: str, aliment_file_signiture: str, model_path: str = None) -> [str]:
491 |     julius_args = {
492 |         '-h': str(
493 |             JULIUS_ROOT / 'model' / 'phone_m' / 'jnas-mono-16mix-gid.binhmm'
494 |             ) if model_path is None else model_path,
495 |         '-palign':'',
496 |         '-input': 'file',
497 |         '-gram': aliment_file_signiture,
498 |     }
499 | 
500 |     file_echo_p = subprocess.Popen(['echo', target_wav_file], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
501 |     julius_p = subprocess.Popen(' '.join([str(JULIUS_ROOT / 'bin'/ get_os_dependent_directory() / get_os_dependent_exec()),
502 |                             *list(chain.from_iterable([[k, v] for k, v in julius_args.items()]))]).split(), stdin=file_echo_p.stdout, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
503 |     file_echo_p.stdout.close()
504 |     return julius_p.communicate()[0].decode('utf-8').split('\n')
505 | 
506 | 
507 | def get_time_alimented_list(raw_output: str) -> [str]:
508 |     r = re.compile('\[\s*(\d+)\s+(\d+)\s*\]\s*[\-]*[\d,\.]+\s*([\w,\:]+)$')
509 |     
510 |     return [
511 |         (s.group(1), s.group(2), s.group(3))
512 |         for s in map(lambda x: r.search(x), raw_output) if s is not None
513 |     ]
514 | 


--------------------------------------------------------------------------------