├── LICENSE
├── README.md
├── data_load.py
├── g2p.py
├── graph2pron_statistics.md
├── hyperparams.py
├── ko.txt
├── modules.py
├── networks.py
├── prepo.py
├── rulebook.txt
├── synthesize.py
├── train.py
└── utils.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Korean TTS Model: what is the best Hangul processing strategy for Korean speech synthesis?
 2 | 
 3 | Hangul is a unique script designed mostly for Korean. It is phonetic in principle like Latin letters, but you need to know much more pronunciation rules in order to pronounce it correctly than you do for German or Spanish. Hangul is syllable-based like Kana,
 4 | the Japanese script, but Hangul is also different from Kana in that Hangul syllables can be decomposed
 5 | into their constitutional consonants and vowels.
 6 | Putting together, these are quite handy for readability in practice, but often they embarrass Korean computational linguists.
 7 | Do I have to convert graphmes into phonemes first? Is it better to decompose Hangul syllables for TTS?
 8 |  Or do I have to take syllables without decomposition?
 9 |  If you know the scene behind the Hangul unicode, you will find things are even
10 |  more complicated. There are two kinds of unicode blocks for contemporary Hangul consonants and vowels (called __jamo__ in Korean): Hangul Jamo (0x01100-0x011FF) and
11 |  Hangul Compatibility Jamo (0x03130-0x0318F). In Hangul Compatibility Jamo the first consonant (onset) and the final consonant (code) are given the same unicode point,
12 |  wherase in Hangul Jamo they are treated as independent letters. (Figuratively, if you follow the Hangul Jamo system in English, you have to distinguish the two l's in law and cool)
13 | On the other hand, those two regard consonant clusters such as ㄲ, ㄱㅅ as a single letter. Some claim that they should be understood as a sequence of single consonants. Are they right in the computational practice? These questions motivate this project.
14 | 
15 | I run four different experiements depending on the Hangul processing strategies below.
16 | 
17 | * Exp.0: Hangul Jamo (0x01100-0x011FF) with consonant clusters. Graphemes are converted into phonemes.
18 | * Exp.1: Hangul Jamo (0x01100-0x011FF) with consonant clusters.
19 | * Exp.2: Hangul Compatibility Jamo (0x03130-0x0318F) with consonant clusters
20 | * Exp.3: Hangul Jamo (0x01100-0x011FF). Single consonants only.
21 | * Exp.4: Hangul Compatibility Jamo (0x03130-0x0318F). Single consonants only.
22 | 
23 | ## Requirements
24 |   * python >= 2.7
25 |   * NumPy >= 1.11.1
26 |   * TensorFlow >= 1.3
27 |   * librosa
28 |   * tqdm
29 |   * matplotlib
30 |   * scipy
31 | 
32 | ## Data
33 | 
34 | [KSS Dataset](https://www.kaggle.com/bryanpark/korean-single-speaker-speech-dataset/version/2), a Korean single speaker speech dataset, is used.
35 | 
36 | ## Model
37 | DCTTS, introudced in [Efficiently Trainable Text-to-Speech System Based on Deep Convolutional Networks with Guided Attention](https://arxiv.org/abs/1710.08969), is implemented for this project.
38 | You can refer to my other repo to see the original implementation. This repo focuses on the comparison among the four different experiment conditions.
39 | 
40 | ## Training
41 |   * STEP 0. Download [KSS Dataset](https://www.kaggle.com/bryanpark/korean-single-speaker-speech-dataset).
42 |   * STEP 1. Adjust `num_exp` in `hyperparams.py`.
43 |   * STEP 2. Run `python prepro.py` for model inputs and targets.
44 |   * STEP 3. Run `python train.py 1` for training Text2Mel.
45 |   * STEP 4. Run `python train.py 2` for training SSRN.
46 | 
47 | You can do STEP 3 and 4 at the same time, if you have more than one gpu card.
48 | 
49 | 
50 | ## Sample Synthesis
51 |   * Run `synthesize.py` and check the files in `samples`.
52 | 
53 | ## Generated Samples
54 | 
55 | | Num Experiment       | Samples |
56 | | :----- |:-------------|
57 | | 0      | [400k](https://soundcloud.com/kyubyong-park/sets/kss_exp0)|
58 | | 1      | [400k](https://soundcloud.com/kyubyong-park/sets/kss_exp1)|
59 | | 2      | [400k](https://soundcloud.com/kyubyong-park/sets/kss_exp2)|
60 | | 3| [400k](https://soundcloud.com/kyubyong-park/sets/kss_ex3)|
61 | |4 | [400k](https://soundcloud.com/kyubyong-park/sets/kss_exp4)|
62 | 
63 | ## Pretrained Models
64 | 
65 | | Num Experiment       | Models |
66 | | :----- |:-------------|
67 | | 0      | [400k](https://www.dropbox.com/s/ipt17hoo4lj56xg/exp0.zip?dl=0)|
68 | | 1      | [400k](https://www.dropbox.com/s/q133hrwyyvudl65/exp1.zip?dl=0)|
69 | | 2      | [400k](https://www.dropbox.com/s/vaz0tb5l8gwfvd0/exp2.zip?dl=0)|
70 | | 3| [400k](https://www.dropbox.com/s/iy7v2zzqguw1q18/exp3.zip?dl=0)|
71 | |4 | [400k](https://www.dropbox.com/s/qtxiss3jk0hjbap/exp4.zip?dl=0)|
72 | 
73 | ## Notes
74 | 
75 |   * Refer to [this](https://github.com/Kyubyong/kss/blob/master/graph2pron_statistics.md), which is provided by Hyungjun So.
76 | 


--------------------------------------------------------------------------------
/data_load.py:
--------------------------------------------------------------------------------
  1 | #-*- coding: utf-8 -*-
  2 | #/usr/bin/python2
  3 | '''
  4 | By kyubyong park. kbpark.linguist@gmail.com. 
  5 | https://www.github.com/kyubyong/kss
  6 | '''
  7 | 
  8 | from __future__ import print_function
  9 | 
 10 | from hyperparams import Hyperparams as hp
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | from utils import *
 14 | import codecs
 15 | import re
 16 | import os
 17 | import unicodedata
 18 | from itertools import chain
 19 | from g2p import runKoG2P
 20 | 
 21 | 
 22 | def load_vocab():
 23 |     char2idx = {char: idx for idx, char in enumerate(hp.vocab)}
 24 |     idx2char = {idx: char for idx, char in enumerate(hp.vocab)}
 25 |     return char2idx, idx2char
 26 | 
 27 | def load_data(mode="train"):
 28 |     '''Loads data
 29 |       Args:
 30 |           mode: "train" or "synthesize".
 31 |     '''
 32 |     # Load vocabulary
 33 |     char2idx, idx2char = load_vocab()
 34 | 
 35 |     # load conversion dictionaries
 36 |     j2hcj, j2sj, j2shcj = load_j2hcj(), load_j2sj(), load_j2shcj()
 37 | 
 38 |     if mode=="train":
 39 |         # Parse
 40 |         fpaths, text_lengths, texts = [], [], []
 41 |         transcript = os.path.join(hp.data, 'transcript.v.1.1.txt')
 42 |         lines = codecs.open(transcript, 'r', 'utf-8').readlines()
 43 |         for line in lines:
 44 |             fname, _, expanded, text, _ = line.strip().split("|")
 45 | 
 46 |             fpath = os.path.join(hp.data, fname)
 47 |             fpaths.append(fpath)
 48 | 
 49 |             if hp.num_exp==0:
 50 |                 text = expanded + u"␃"  # ␃: EOS
 51 |                 text = runKoG2P(text, "rulebook.txt")
 52 |             else:
 53 |                 text += u"␃"  # ␃: EOS
 54 |                 if hp.num_exp==2:
 55 |                     text = [j2hcj[char] for char in text]
 56 |                 elif hp.num_exp==3:
 57 |                     text = [j2sj[char] for char in text]
 58 |                 elif hp.num_exp==4:
 59 |                     text = [j2shcj[char] for char in text]
 60 |                 text = chain.from_iterable(text)
 61 | 
 62 |             text = [char2idx[char] for char in text]
 63 |             text_lengths.append(len(text))
 64 |             texts.append(np.array(text, np.int32).tostring())
 65 | 
 66 |         return fpaths, text_lengths, texts
 67 |     else: # synthesize on unseen test text.
 68 |         # Parse
 69 |         def _normalize(line):
 70 |             _, expanded, text = line.strip().split("|")
 71 | 
 72 |             if hp.num_exp==0:
 73 |                 text = expanded + u"␃"  # ␃: EOS
 74 |                 text = runKoG2P(text, "rulebook.txt")
 75 |             else:
 76 |                 text += u"␃"
 77 |                 if hp.num_exp==2:
 78 |                     text = [j2hcj[char] for char in text]
 79 |                 elif hp.num_exp==3:
 80 |                     text = [j2sj[char] for char in text]
 81 |                 elif hp.num_exp==4:
 82 |                     text = [j2shcj[char] for char in text]
 83 |                 text = chain.from_iterable(text)
 84 |             text = [char2idx[char] for char in text]
 85 |             return text
 86 | 
 87 |         lines = codecs.open(hp.test_data, 'r', 'utf8').read().splitlines()
 88 |         sents = [_normalize(line) for line in lines[1:]]
 89 |         texts = np.zeros((len(sents), hp.max_N), np.int32)
 90 |         for i, sent in enumerate(sents):
 91 |             texts[i, :len(sent)] = sent
 92 |         return texts
 93 | 
 94 | def get_batch():
 95 |     """Loads training data and put them in queues"""
 96 |     with tf.device('/cpu:0'):
 97 |         # Load data
 98 |         fpaths, text_lengths, texts = load_data() # list
 99 |         maxlen, minlen = max(text_lengths), min(text_lengths)
100 | 
101 |         # Calc total batch count
102 |         num_batch = len(fpaths) // hp.B
103 | 
104 |         # Create Queues
105 |         fpath, text_length, text = tf.train.slice_input_producer([fpaths, text_lengths, texts], shuffle=True)
106 | 
107 |         # Parse
108 |         text = tf.decode_raw(text, tf.int32)  # (None,)
109 | 
110 |         def _load_spectrograms(fpath):
111 |             fname = os.path.basename(fpath)
112 |             mel = "/data/private/kss/dc_tts/mels/{}".format(fname.replace("wav", "npy"))
113 |             mag = "/data/private/kss/dc_tts/mags/{}".format(fname.replace("wav", "npy"))
114 |             return fname, np.load(mel), np.load(mag)
115 | 
116 |         fname, mel, mag = tf.py_func(_load_spectrograms, [fpath], [tf.string, tf.float32, tf.float32])
117 | 
118 |         # Add shape information
119 |         fname.set_shape(())
120 |         text.set_shape((None,))
121 |         mel.set_shape((None, hp.n_mels))
122 |         mag.set_shape((None, hp.n_fft//2+1))
123 | 
124 |         # Batching
125 |         _, (texts, mels, mags, fnames) = tf.contrib.training.bucket_by_sequence_length(
126 |                                             input_length=text_length,
127 |                                             tensors=[text, mel, mag, fname],
128 |                                             batch_size=hp.B,
129 |                                             bucket_boundaries=[i for i in range(minlen + 1, maxlen - 1, 20)],
130 |                                             num_threads=8,
131 |                                             capacity=hp.B*4,
132 |                                             dynamic_pad=True)
133 | 
134 |     return texts, mels, mags, fnames, num_batch
135 | 
136 | 


--------------------------------------------------------------------------------
/g2p.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | '''
  3 | This is mostly adapted from https://github.com/scarletcho/KoG2P.
  4 | g2p.py
  5 | ~~~~~~~~~~
  6 | This script converts Korean graphemes to romanized phones and then to pronunciation.
  7 |     (1) graph2phone: convert Korean graphemes to romanized phones
  8 |     (2) phone2prono: convert romanized phones to pronunciation
  9 |     (3) graph2phone: convert Korean graphemes to pronunciation
 10 | Usage:  $ python g2p.py '스물 여덟째 사람'
 11 |         (NB. Please check 'rulebook_path' before usage.)
 12 | Yejin Cho (scarletcho@gmail.com)
 13 | Jaegu Kang (jaekoo.jk@gmail.com)
 14 | Hyungwon Yang (hyung8758@gmail.com)
 15 | Yeonjung Hong (yvonne.yj.hong@gmail.com)
 16 | Created: 2016-08-11
 17 | Last updated: 2017-02-22 Yejin Cho
 18 | * Key updates made:
 19 |     - Executable in both Python 2 and 3.
 20 |     - G2P Performance test available ($ python g2p.py test)
 21 |     - G2P verbosity control available
 22 | '''
 23 | 
 24 | import datetime as dt
 25 | import re
 26 | import math
 27 | import sys
 28 | import optparse
 29 | 
 30 | # Option
 31 | parser = optparse.OptionParser()
 32 | parser.add_option("-v", action="store_true", dest="verbose", default="False",
 33 |                   help="This option prints the detail information of g2p process.")
 34 | 
 35 | (options, args) = parser.parse_args()
 36 | verbose = options.verbose
 37 | 
 38 | # Check Python version
 39 | ver_info = sys.version_info
 40 | 
 41 | if ver_info[0] == 2:
 42 |     reload(sys)
 43 |     sys.setdefaultencoding('utf-8')
 44 | 
 45 | 
 46 | def readfileUTF8(fname):
 47 |     f = open(fname, 'r')
 48 |     corpus = []
 49 | 
 50 |     while True:
 51 |         line = f.readline()
 52 |         line = line.encode("utf-8")
 53 |         line = re.sub(u'\n', u'', line)
 54 |         if line != u'':
 55 |             corpus.append(line)
 56 |         if not line: break
 57 | 
 58 |     f.close()
 59 |     return corpus
 60 | 
 61 | 
 62 | def writefile(body, fname):
 63 |     out = open(fname, 'w')
 64 |     for line in body:
 65 |         out.write('{}\n'.format(line))
 66 |     out.close()
 67 | 
 68 | 
 69 | def readRules(pver, rule_book):
 70 |     if pver == 2:
 71 |         f = open(rule_book, 'r')
 72 |     elif pver == 3:
 73 |         f = open(rule_book, 'r', encoding="utf-8")
 74 | 
 75 |     rule_in = []
 76 |     rule_out = []
 77 | 
 78 |     while True:
 79 |         line = f.readline()
 80 |         if pver == 2:
 81 |             line = unicode(line.encode("utf-8"))
 82 |             line = re.sub(u'\n', u'', line)
 83 |         elif pver == 3:
 84 |             line = re.sub('\n', '', line)
 85 | 
 86 |         if line != u'':
 87 |             if line[0] != u'#':
 88 |                 # print(line)
 89 |                 IOlist = line.split('\t')
 90 |                 rule_in.append(IOlist[0])
 91 |                 if IOlist[1]:
 92 |                     rule_out.append(IOlist[1])
 93 |                 else:  # If output is empty (i.e. deletion rule)
 94 |                     rule_out.append(u'')
 95 |         if not line: break
 96 |     f.close()
 97 | 
 98 |     return rule_in, rule_out
 99 | 
100 | 
101 | def isHangul(charint):
102 |     hangul_init = 44032
103 |     hangul_fin = 55203
104 |     return charint >= hangul_init and charint <= hangul_fin
105 | 
106 | 
107 | def checkCharType(var_list):
108 |     #  1: whitespace
109 |     #  0: hangul
110 |     # -1: non-hangul
111 |     checked = []
112 |     for i in range(len(var_list)):
113 |         if var_list[i] == 32:  # whitespace
114 |             checked.append(1)
115 |         elif isHangul(var_list[i]):  # Hangul character
116 |             checked.append(0)
117 |         else:  # Non-hangul character
118 |             checked.append(-1)
119 |     return checked
120 | 
121 | 
122 | def graph2phone(graphs):
123 |     # Encode graphemes as utf8
124 |     try:
125 |         graphs = graphs.decode('utf8')
126 |     except AttributeError:
127 |         pass
128 | 
129 |     integers = []
130 |     for i in range(len(graphs)):
131 |         integers.append(ord(graphs[i]))
132 | 
133 |     # Romanization (according to Korean Spontaneous Speech corpus; 성인자유발화코퍼스)
134 |     phones = ''
135 |     ONS = ['k0', 'kk', 'nn', 't0', 'tt', 'rr', 'mm', 'p0', 'pp',
136 |            's0', 'ss', 'oh', 'c0', 'cc', 'ch', 'kh', 'th', 'ph', 'h0']
137 |     NUC = ['aa', 'qq', 'ya', 'yq', 'vv', 'ee', 'yv', 'ye', 'oo', 'wa',
138 |            'wq', 'wo', 'yo', 'uu', 'wv', 'we', 'wi', 'yu', 'xx', 'xi', 'ii']
139 |     COD = ['', 'kf', 'kk', 'ks', 'nf', 'nc', 'nh', 'tf',
140 |            'll', 'lk', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh',
141 |            'mf', 'pf', 'ps', 's0', 'ss', 'oh', 'c0', 'ch',
142 |            'kh', 'th', 'ph', 'h0']
143 | 
144 |     # Pronunciation
145 |     idx = checkCharType(integers)
146 |     iElement = 0
147 |     while iElement < len(integers):
148 |         if idx[iElement] == 0:  # not space characters
149 |             base = 44032
150 |             df = int(integers[iElement]) - base
151 |             iONS = int(math.floor(df / 588)) + 1
152 |             iNUC = int(math.floor((df % 588) / 28)) + 1
153 |             iCOD = int((df % 588) % 28) + 1
154 | 
155 |             s1 = '@' + ONS[iONS - 1]  # onset
156 |             s2 = NUC[iNUC - 1]  # nucleus
157 | 
158 |             if COD[iCOD - 1]:  # coda
159 |                 s3 = COD[iCOD - 1]
160 |             else:
161 |                 s3 = ''
162 |             tmp = "`" + s1 + "`" + s2 + "`" + s3 + "`"
163 |             phones = phones + tmp
164 | 
165 |         elif idx[iElement] == 1:  # space character
166 |             tmp = '`#`'
167 |             phones = phones + tmp
168 | 
169 |         else:  # non-Hangul
170 |             phones += "`" + unichr(integers[iElement]) + "`"
171 | 
172 |         iElement += 1
173 |         tmp = ''
174 | 
175 |     # Collapse syllable delimiters (`).
176 |     phones = re.sub("`+", "`", phones)
177 | 
178 |     # 초성 이응 삭제
179 |     phones = phones.replace("`@oh`", "`@")
180 | 
181 |     # 받침 이응 'ng'으로 처리 (Velar nasal in coda position)
182 |     # print(phones)
183 |     phones = phones.replace("oh`@", "ng`@")
184 |     # print(phones,"===")
185 |     phones = phones.replace("oh`#", "ng`#")
186 |     phones = re.sub('oh`$', 'ng`', phones)
187 | 
188 | 
189 |     return phones
190 | 
191 | 
192 | def phone2prono(phones, rule_in, rule_out):
193 |     # Apply g2p rules
194 |     for pattern, replacement in zip(rule_in, rule_out):
195 |         _phones = phones
196 |         phones = re.sub(pattern, replacement, phones)
197 |         # if _phones != phones:
198 |         #     print(_phones, "->", phones)
199 |         #     print("::", pattern, "->", replacement)
200 |         prono = phones
201 |     return prono
202 | 
203 | 
204 | def graph2prono(graphs, rule_in, rule_out):
205 |     romanized = graph2phone(graphs)
206 |     prono = phone2prono(romanized, rule_in, rule_out)
207 | 
208 |     prono = re.sub(u'`', u' ', prono)
209 |     prono = re.sub(u' $', u'', prono)
210 |     # prono = re.sub(u'#', u'@', prono)
211 |     prono = re.sub(u'@+', u'@', prono)
212 | 
213 |     prono_prev = prono
214 |     identical = False
215 |     loop_cnt = 1
216 | 
217 |     # if verbose == True:
218 |     # print('=> Romanized: ' + romanized)
219 |     # print('=> Initial output: ' + prono)
220 | 
221 |     while not identical:
222 |         prono_new = phone2prono(re.sub(u' ', u'`', prono_prev + u'`'), rule_in, rule_out)
223 |         prono_new = re.sub(u'`', u' ', prono_new)
224 |         prono_new = re.sub(u' $', u'', prono_new)
225 | 
226 |         if re.sub(u'@', u'', prono_prev) == re.sub(u'@', u'', prono_new):
227 |             identical = True
228 |             prono_new = re.sub(u'@', u'', prono_new)
229 |             # if verbose == True:
230 |             # print('\n=> Exhaustive rule application completed!')
231 |             # print('=> Total loop count: ' + str(loop_cnt))
232 |             # print('=> Output: ' + prono_new)
233 |         else:
234 |             # if verbose == True:
235 |             # print('\n=> Rule applied for more than once')
236 |             # print('cmp1: ' + re.sub(u'@', u'', prono_prev))
237 |             # print('cmp2: ' + re.sub(u'@', u'', prono_new))
238 |             loop_cnt += 1
239 |             prono_prev = prono_new
240 | 
241 |     # prono_new = prono_new.replace("@", "")
242 |     # prono_new = prono_new.strip("`")
243 |     # prono_new = prono_new.replace("`#`", " ")
244 |     # print("prnono_new::", prono_new)
245 |     prono_new = prono_new.strip()
246 | 
247 |     return prono_new
248 | 
249 | 
250 | def runKoG2P(graph, rulebook):
251 |     [rule_in, rule_out] = readRules(ver_info[0], rulebook)
252 |     if ver_info[0] == 2:
253 |         prono = graph2prono(unicode(graph), rule_in, rule_out)
254 |     elif ver_info[0] == 3:
255 |         prono = graph2prono(graph, rule_in, rule_out)
256 | 
257 |     phones = [phone.replace("#", " ") for phone in prono.split()]
258 | 
259 |     return phones
260 | 
261 | 
262 | # Usage:
263 | if __name__ == '__main__':
264 |     graph = args[0]
265 |     phonemes = runKoG2P(graph, 'rulebook.txt')
266 |     # print(phonemes)
267 | 
268 | 


--------------------------------------------------------------------------------
/graph2pron_statistics.md:
--------------------------------------------------------------------------------
 1 | ### Symbolic comparison statistics based on natural text
 2 | 
 3 | #### Basis of phonetic representation
 4 | The ground truth should be the acoustic measure of "typical sound" - 'average' pronunciation of all Korean speaker, which is difficult to obtain. Instead, the "standard pronunciation symbol" which is a pivot or a guideline of standard pronunciation is used as symbolic counterpart of a ground truth - Every person have different phonetic character, but at least they (usually) speak as the same way as tagged by standard pronunciation. For example, 허구연, a famous baseball caster, speaks strong dialect that pronounces 임꺽정 as [임끅쯩(이)] but still pronounces the same rule as 걱정 like [극쯩].
 5 | 
 6 | Note that it is debateful that the standard pronunciation symbol is equivalent to the phone, since there are several implicit phonetic rules that is not represented on the standard pronunciation symbol. For example, voicing rule of voiceless consonants (ㄱ, ㄷ, ㅂ, ㅅ, ㅈ) allows those consonants to have their voiced counterpart, but never being voiced in the front of sound cluster. Moreover, the null consonant (ㅇ) at the very start of the sound cluster sounds as glottal plosive ([ʔ]) or several exceptional cases (e. g., Roman alphabet E). All of these modification to consonants cannot be distinguished in the standard pronunciation symbol although being strictly different phonetic character.
 7 | 
 8 | In a usual input sequence of machine learning, whole sequences are fed into input. However, it is impossible to span whole language sequences as a probability basis, the input sequence should be splitted into a certain way to use as a statistical basis. Using the virtue of 'forced vowelization(?)',phonetic characteristics of Korean language, every fragment of a sound is assigned by a vowel. Therefore, for a given sequence of any length, number of vowels are equal for any representation under the same vowel set. A typical choice would be assigning a sound basis equivalent as a grapheme ([Consonant, Vowel, Coda]) pair. On the other hand, another basis are suggested to less suffer on the modification rule across two subsequent graphemes. A "triphone" basis spans between vowels (since vowels are less affected by modification); as [Vowel, Coda, Consonant, Vowel] pair.
 9 | 
10 | Using two basis, we split a sample text into a basis sequences and compare the correpondence (or, degree of confusion) compared to corresponding pairs by standard pronunciation symbol. A degree of confusion is computed as sum of partition entropy; 0 if all of basis `$A_i$` is correponds to `$B_j$` (one-to-one correspondence). Suppose a number `$n$` of base (a [CnVoCo] or [VoCoCnVo]) is in a `$A_i$` and in standard pronunciation `$B_j$`, we connect with a network `$N_{ij} = n$`. Then the confusion of A based on B is represented as
11 | 
12 | `$C(A|B) = \sum_{i} p_{i} \sum_{j} -ln p_{j|i}$`
13 | 
14 | where `$p_{i} = \sum_j N_{ij} / \sum_{ij} N_{ij}$` and `$p_{j|i} = N_{ij}/\sum_i N_j$`. Note that this is asymmetric measure of A and B.
15 | 
16 | The confusion will be calculated in the way given by 4 experiments; but in the triphone basis only exp.1 and exp.4 are distinguished. 
17 | 
18 | #### Dataset
19 | The sample dataset of Korean natural text was concatenation of four text sets: Korean translation of the short novel "The Black Cat" by Edgar Allan Poe, the Korean short novel "운수 좋은 날" by 현진건, kss dataset scripts and a private stt script set. Only texts and spaces are included, since only independent pronunciation is considered. Note that in this condition, phonetic modification such as liaison is ignored.
20 | 
21 | #### Result and discussions
22 | ##### Triphone, C(StdPrn|Graph), with exp.4 basis (Consonant cluster 
23 | Overall confusion: 0.003859046558360724
24 | Top-10 most confused base
25 | ('ㅏ', 'ㄹ', 'ㅇ', 'ㅣ') : ([(('aa', 'll', 'rr', 'ii'), 36), (('aa', ' ', 'rr', 'ii'), 666)])
26 | ('ㅏ', 'ㄱ', 'ㄱ', 'ㅏ') : ([(('aa', 'kf', 'kk', 'aa'), 48), (('aa', ' ', 'kk', 'aa'), 215)])
27 | ('ㅣ', 'ㄴ', 'ㄱ', 'ㅏ') : ([(('ii', 'nf', 'kk', 'aa'), 46), (('ii', 'nf', 'k0', 'aa'), 197)])
28 | ('ㅣ', 'ㄴ', 'ㄷ', 'ㅏ') : ([(('ii', 'nf', 'tt', 'aa'), 37), (('ii', 'nf', 't0', 'aa'), 263)])
29 | ('ㅣ', 'ㄴ', 'ㅈ', 'ㅣ') : ([(('ii', 'nf', 'c0', 'ii'), 155), (('ii', 'nf', 'cc', 'ii'), 47)])
30 | ('ㅣ', 'ㄴ', 'ㄱ', 'ㅗ') : ([(('ii', 'nf', 'kk', 'oo'), 145), (('ii', 'nf', 'k0', 'oo'), 35)])
31 | ('ㅣ', 'ㄱ', 'ㄱ', 'ㅏ') : ([(('ii', 'kf', 'kk', 'aa'), 15), (('ii', ' ', 'kk', 'aa'), 714)])
32 | ('ㅏ', 'ㄹ', 'ㄱ', 'ㅔ') : ([(('aa', 'll', 'kk', 'ee'), 131), (('aa', 'll', 'k0', 'ee'), 23)])
33 | ('ㅏ', 'ㄹ', 'ㅈ', 'ㅣ') : ([(('aa', 'll', 'cc', 'ii'), 41), (('aa', 'll', 'c0', 'ii'), 46)])
34 | ('ㅣ', 'ㄴ', 'ㄱ', 'ㅣ') : ([(('ii', 'nf', 'kk', 'ii'), 20), (('ii', 'nf', 'k0', 'ii'), 111)])
35 | 
36 | ##### Triphone, C(StdPrn|Graph), with exp.1 basis
37 | Overall confusion: 0.002684402501353149
38 | Top-10 most confused base
39 | ('ㅏ', 'ㄹ', 'ㅇ', 'ㅣ') : ([(('aa', 'll', 'rr', 'ii'), 36), (('aa', ' ', 'rr', 'ii'), 666)])
40 | ('ㅣ', 'ㄴ', 'ㄱ', 'ㅏ') : ([(('ii', 'nf', 'kk', 'aa'), 46), (('ii', 'nf', 'k0', 'aa'), 197)])
41 | ('ㅣ', 'ㄴ', 'ㄷ', 'ㅏ') : ([(('ii', 'nf', 't0', 'aa'), 263), (('ii', 'nf', 'tt', 'aa'), 37)])
42 | ('ㅣ', 'ㄴ', 'ㅈ', 'ㅣ') : ([(('ii', 'nf', 'cc', 'ii'), 47), (('ii', 'nf', 'c0', 'ii'), 155)])
43 | ('ㅣ', 'ㄴ', 'ㄱ', 'ㅗ') : ([(('ii', 'nf', 'k0', 'oo'), 35), (('ii', 'nf', 'kk', 'oo'), 145)])
44 | ('ㅏ', 'ㄹ', 'ㄱ', 'ㅔ') : ([(('aa', 'll', 'k0', 'ee'), 23), (('aa', 'll', 'kk', 'ee'), 131)])
45 | ('ㅏ', 'ㄹ', 'ㅈ', 'ㅣ') : ([(('aa', 'll', 'cc', 'ii'), 41), (('aa', 'll', 'c0', 'ii'), 46)])
46 | ('ㅣ', 'ㄴ', 'ㄱ', 'ㅣ') : ([(('ii', 'nf', 'k0', 'ii'), 111), (('ii', 'nf', 'kk', 'ii'), 20)])
47 | ('ㅏ', 'ㄹ', 'ㄱ', 'ㅓ') : ([(('aa', 'll', 'k0', 'vv'), 22), (('aa', 'll', 'kk', 'vv'), 64)])
48 | ('ㅡ', 'ㅁ', 'ㄷ', 'ㅏ') : ([(('xx', 'mf', 't0', 'aa'), 48), (('xx', 'mf', 'tt', 'aa'), 26)])
49 | 
50 | ##### Grapheme, C(StdPrn|Graph)
51 | Overall confusion: 0.3983152605973124
52 | top-10 most confused base
53 | ('ㅇ', 'ㅣ', ' ') : ([(('ch', 'ii', ' '), 389), (('c0', 'ii', ' '), 19), (('ss', 'ii', ' '), 157), (('rr', 'ii', ' '), 2419), (('p0', 'ii', ' '), 456), (('oh', 'ii', ' '), 11308), (('mm', 'ii', ' '), 1865), (('k0', 'ii', ' '), 1589), (('nn', 'ii', ' '), 2853), (('kk', 'ii', ' '), 29), (('s0', 'ii', ' '), 441), (('ph', 'ii', ' '), 44)])
54 | ('ㅇ', 'ㅓ', ' ') : ([(('ss', 'vv', ' '), 6860), (('k0', 'vv', ' '), 553), (('ph', 'vv', ' '), 770), (('c0', 'vv', ' '), 83), (('mm', 'vv', ' '), 98), (('t0', 'vv', ' '), 64), (('nn', 'vv', ' '), 124), (('kk', 'vv', ' '), 11), (('s0', 'vv', ' '), 62), (('rr', 'vv', ' '), 2720), (('p0', 'vv', ' '), 83), (('oh', 'vv', ' '), 4290), (('th', 'vv', ' '), 26)])
55 | ('ㅇ', 'ㅔ', ' ') : ([(('mm', 'ee', ' '), 921), (('k0', 'ee', ' '), 807), (('nn', 'ee', ' '), 1999), (('kk', 'ee', ' '), 114), (('ph', 'ee', ' '), 120), (('p0', 'ee', ' '), 405), (('c0', 'ee', ' '), 20), (('kh', 'ee', ' '), 4), (('oh', 'ee', ' '), 5356), (('th', 'ee', ' '), 56), (('ss', 'ee', ' '), 1), (('rr', 'ee', ' '), 1114), (('s0', 'ee', ' '), 95), (('ch', 'ee', ' '), 4)])
56 | ('ㅇ', 'ㅡ', 'ㄹ') : ([(('rr', 'xx', 'll'), 1278), (('oh', 'xx', 'll'), 1367), (('kk', 'xx', 'll'), 11), (('t0', 'xx', 'll'), 64), (('ph', 'xx', 'll'), 30), (('k0', 'xx', 'll'), 1044), (('nn', 'xx', 'll'), 1153), (('s0', 'xx', ' '), 1), (('th', 'xx', 'll'), 14), (('s0', 'xx', 'll'), 240), (('oh', 'xx', ' '), 60), (('c0', 'xx', 'll'), 75), (('p0', 'xx', 'll'), 344), (('ss', 'xx', 'll'), 477), (('ch', 'xx', 'll'), 11), (('mm', 'xx', 'll'), 841)])
57 | ('ㅇ', 'ㅡ', 'ㄴ') : ([(('rr', 'xx', 'nf'), 1083), (('k0', 'xx', 'nf'), 710), (('rr', 'xx', ' '), 1), (('kk', 'xx', 'nf'), 20), (('ph', 'xx', 'nf'), 206), (('nn', 'xx', ' '), 1), (('th', 'xx', 'nf'), 224), (('mm', 'xx', 'nf'), 886), (('s0', 'xx', 'nf'), 238), (('oh', 'xx', ' '), 23), (('oh', 'xx', 'nf'), 1206), (('nn', 'xx', 'nf'), 905), (('mm', 'xx', ' '), 4), (('k0', 'xx', ' '), 2), (('ch', 'xx', 'nf'), 11), (('c0', 'xx', 'nf'), 54), (('t0', 'xx', 'nf'), 33), (('ss', 'xx', 'nf'), 5), (('p0', 'xx', 'nf'), 241)])
58 | ('ㅇ', 'ㅏ', ' ') : ([(('mm', 'aa', ' '), 92), (('nn', 'aa', ' '), 878), (('rr', 'aa', ' '), 801), (('th', 'aa', ' '), 560), (('oh', 'aa', ' '), 4906), (('p0', 'aa', ' '), 94), (('c0', 'aa', ' '), 789), (('k0', 'aa', ' '), 59), (('t0', 'aa', ' '), 197), (('ch', 'aa', ' '), 5), (('kk', 'aa', ' '), 27), (('s0', 'aa', ' '), 5), (('ph', 'aa', ' '), 38)])
59 | ('ㄷ', 'ㅏ', ' ') : ([(('th', 'aa', ' '), 256), (('tt', 'aa', ' '), 3486), (('t0', 'aa', ' '), 7884)])
60 | ('ㅇ', 'ㅡ', ' ') : ([(('ch', 'xx', ' '), 3), (('th', 'xx', ' '), 36), (('ss', 'xx', ' '), 380), (('nn', 'xx', ' '), 354), (('rr', 'xx', ' '), 63), (('p0', 'xx', ' '), 130), (('s0', 'xx', ' '), 87), (('c0', 'xx', ' '), 60), (('mm', 'xx', ' '), 167), (('k0', 'xx', ' '), 717), (('oh', 'xx', ' '), 425), (('kk', 'xx', ' '), 27), (('t0', 'xx', ' '), 91), (('ph', 'xx', ' '), 75)])
61 | ('ㅇ', 'ㅣ', 'ㅆ') : ([(('k0', 'ii', 'nf'), 4), (('s0', 'ii', 'tf'), 29), (('nn', 'ii', 'nf'), 2), (('c0', 'ii', ' '), 1), (('rr', 'ii', 'nf'), 5), (('rr', 'ii', ' '), 4), (('t0', 'ii', 'tf'), 7), (('s0', 'ii', ' '), 47), (('s0', 'ii', 'nf'), 50), (('mm', 'ii', 'nf'), 5), (('nn', 'ii', ' '), 9), (('k0', 'ii', ' '), 4), (('oh', 'ii', ' '), 2238), (('oh', 'ii', 'tf'), 1139), (('oh', 'ii', 'nf'), 847), (('nn', 'ii', 'tf'), 2), (('k0', 'ii', 'tf'), 1), (('mm', 'ii', ' '), 1)])
62 | ('ㅇ', 'ㅣ', 'ㄹ') : ([(('rr', 'ii', 'll'), 105), (('oh', 'ii', 'll'), 2441), (('kk', 'ii', 'll'), 1), (('ch', 'ii', ' '), 3), (('ph', 'ii', 'll'), 1), (('k0', 'ii', 'll'), 49), (('s0', 'ii', 'll'), 10), (('rr', 'ii', ' '), 33), (('nn', 'ii', ' '), 49), (('oh', 'ii', ' '), 1184), (('nn', 'ii', 'll'), 63), (('mm', 'ii', ' '), 31), (('k0', 'ii', ' '), 13), (('mm', 'ii', 'll'), 36), (('p0', 'ii', ' '), 21), (('ch', 'ii', 'll'), 2), (('p0', 'ii', 'll'), 31)])
63 | 
64 | ##### Discussion
65 | Note that in bare grapheme basis has a lot of confusion especially in consonant ㅇ. This is largely cured by adopting triphone basis which mostly maps coda+consonant ㅇ into a single standard pronunciation. While, difference between experiment basis 1 and 4 comes from the "loss of the positional information of consonant (between coda and consonant)" which brings confusion. Separating consonant cluster with preserving positional information give almost the confusion as exp.1 (data not included).
66 | 
67 | The best case, which fully uses Hangul Jamo (0x01100-0x011FF) with consonant clusters still have some confusion; From the examples we can find out that the confusion is due to fortis-ification(?) (경음화). The same VCCV pair may pronounce either fortis(경음) or lenis(연음) due to complex rule called 경음화. Most of the 경음화 rules are defined by phonetic basis (e.g., ㅜ+ㄱ+ㅂ+ㅏ -> ㅜ+ㄱ+ㅃ+ㅏ), still a large portion of rule depends on the linguistical context. (e.g., POS, fundamental form of the word, ...) For detailed information of 경음화, please refer to [this link](https://www.korean.go.kr/front/page/pageView.do?page_id=P000102&mn_id=95
68 | 


--------------------------------------------------------------------------------
/hyperparams.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #/usr/bin/python2
 3 | '''
 4 | By kyubyong park. kbpark.linguist@gmail.com. 
 5 | https://www.github.com/kyubyong/kss
 6 | 
 7 | Compare speech synthesis performance depending on different text processing strategies.
 8 | 0: Hangul Jamo (0x01100-0x011FF) with G2P conversion
 9 | 1: Hangul Jamo (0x01100-0x011FF)
10 | 2: Hangul Compatibility Jamo (0x03130-0x0318F)
11 | 3: Hangul Jamo (0x01100-0x011FF). Single consonants only.
12 | 4: Hangul Compatibility Jamo (0x03130-0x0318F). Single consonants only.
13 | '''
14 | class Hyperparams:
15 |     '''Hyper parameters'''
16 |     num_exp = 0
17 | 
18 |     # signal processing
19 |     sr = 22050  # Sampling rate.
20 |     n_fft = 2048  # fft points (samples)
21 |     frame_shift = 0.0125  # seconds
22 |     frame_length = 0.05  # seconds
23 |     hop_length = int(sr * frame_shift)  # samples. =276.
24 |     win_length = int(sr * frame_length)  # samples. =1102.
25 |     n_mels = 80  # Number of Mel banks to generate
26 |     power = 1.5  # Exponent for amplifying the predicted magnitude
27 |     n_iter = 50  # Number of inversion iterations
28 |     preemphasis = .97
29 |     max_db = 100
30 |     ref_db = 20
31 | 
32 |     # Model
33 |     r = 4 # Reduction factor. Do not change this.
34 |     dropout_rate = 0.05
35 |     e = 128 # == embedding
36 |     d = 256 # == hidden units of Text2Mel
37 |     c = 512 # == hidden units of SSRN
38 |     attention_win_size = 3
39 | 
40 |     # data
41 |     data = "/data/public/rw/datasets/CSS10/ko"
42 |     test_data = "ko.txt"
43 | 
44 |     if num_exp == 0:
45 |         vocab = [u"␀", u"␃", " ", "!", ",", ".", "?", 'aa', 'c0', 'cc', 'ch', 'ee', 'h0', 'ii', 'k0', 'kf', 'kh', 'kk', 'ks', 'lb', 'lh', 'lk', 'll', 'lm', 'lp',
46 |          'ls', 'lt', 'mf', 'mm', 'nc', 'nf', 'nh', 'nn', 'ng', 'oh', 'oo', 'p0', 'pf', 'ph', 'pp', 'ps', 'qq', 'rr', 's0',
47 |          'ss', 't0', 'tf', 'th', 'tt', 'uu', 'vv', 'wa', 'we', 'wi', 'wo', 'wq', 'wv', 'xi', 'xx', 'ya', 'ye', 'yo',
48 |          'yq', 'yu', 'yv']
49 |     elif num_exp == 1:
50 |         vocab = u'''␀␃ !,.?ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆩᆪᆫᆬᆭᆮᆯᆰᆱᆲᆴᆶᆷᆸᆹᆺᆻᆼᆽᆾᆿᇀᇁᇂ'''
51 |     elif num_exp == 2:
52 |         vocab = u'''␀␃ !,.?ㄱㄲㄳㄴㄵㄶㄷㄸㄹㄺㄻㄼㄾㅀㅁㅂㅃㅄㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ''' # HCJ
53 |     elif num_exp == 3:
54 |         vocab = u'''␀␃ !,.?ᄀᄂᄃᄅᄆᄇᄉᄋᄌᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆫᆮᆯᆷᆸᆺᆼᆽᆾᆿᇀᇁᇂ'''
55 |     elif num_exp == 4:
56 |         vocab = u'''␀␃ !,.?ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ''' # HCJ. single consonants only.
57 |     max_N, max_T = 123, 162
58 | 
59 |     # training scheme
60 |     lr = 0.001 # Initial learning rate.
61 |     logdir = "logdir/{}".format(num_exp)
62 |     sampledir = 'samples/{}'.format(num_exp)
63 |     B = 16 # batch size
64 |     num_iterations = 400000
65 | 


--------------------------------------------------------------------------------
/ko.txt:
--------------------------------------------------------------------------------
 1 | Korean Sentences
 2 | 1. 안녕하세요, 저는 카카오 브레이니라고 해요.|안녕하세요, 저는 카카오 브레이니라고 해요.|안녕하세요, 저는 카카오 브레이니라고 해요.
 3 | 2. 저는 2018년 4월 대한민국 판교에서 만들어졌어요.|저는 이천십팔 년 사월 대한민국 판교에서 만들어졌어요.|저는 이천십팔 년 사월 대한민국 판교에서 만들어졌어요.
 4 | 3. 저의 창조자는 일단 라이언이라고 해 두죠.|저의 창조자는 일단 라이언이라고 해 두죠.|저의 창조자는 일단 라이언이라고 해 두죠.
 5 | 4. 오늘 여러분을 이렇게 목소리로 만나게 되어 반갑습니다.|오늘 여러분을 이렇게 목소리로 만나게 되어 반갑습니다.|오늘 여러분을 이렇게 목소리로 만나게 되어 반갑습니다.
 6 | 5. 사실 더 이상 제 소개를 드릴 게 없네요.|사실 더 이상 제 소개를 드릴 게 없네요.|사실 더 이상 제 소개를 드릴 게 없네요.
 7 | 6. 저에 대해 궁금하신 점이 있으세요?|저에 대해 궁금하신 점이 있으세요?|저에 대해 궁금하신 점이 있으세요?
 8 | 7. 혹시 그러시면 저에게 전화해 주세요.|혹시 그러시면 저에게 전화해 주세요.|혹시 그러시면 저에게 전화해 주세요.
 9 | 8. 제 전화번호는 010 1234에 5678이에요.|제 전화번호는 공일공 일이삼사에 오륙칠팔이에요.|제 전화번호는 공일공 일이삼사에 오륙칠팔이에요.
10 | 9. 단, 아침 일찍이나 밤 늦은 시각은 피해 주세요.|단, 아침 일찍이나 밤 늦은 시각은 피해 주세요.|단, 아침 일찍이나 밤 늦은 시각은 피해 주세요.
11 | 10. 저도 사생활이라는 게 있으니까요.|저도 사생활이라는 게 있으니까요.|저도 사생활이라는 게 있으니까요.
12 | 11. 그럼 저는 이만 인사 드릴게요.|그럼 저는 이만 인사 드릴게요.|그럼 저는 이만 인사 드릴게요.
13 | 12. 좋은 시간 되세요.|좋은 시간 되세요.|좋은 시간 되세요.


--------------------------------------------------------------------------------
/modules.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #/usr/bin/python2
  3 | '''
  4 | By kyubyong park. kbpark.linguist@gmail.com. 
  5 | https://www.github.com/kyubyong/multi-speech-corpora/dc_tts
  6 | '''
  7 | 
  8 | from __future__ import print_function, division
  9 | 
 10 | import tensorflow as tf
 11 | 
 12 | 
 13 | def embed(inputs, vocab_size, num_units, zero_pad=True, scope="embedding", reuse=None):
 14 |     '''Embeds a given tensor. 
 15 |     
 16 |     Args:
 17 |       inputs: A `Tensor` with type `int32` or `int64` containing the ids
 18 |          to be looked up in `lookup table`.
 19 |       vocab_size: An int. Vocabulary size.
 20 |       num_units: An int. Number of embedding hidden units.
 21 |       zero_pad: A boolean. If True, all the values of the fist row (id 0)
 22 |         should be constant zeros.
 23 |       scope: Optional scope for `variable_scope`.  
 24 |       reuse: Boolean, whether to reuse the weights of a previous layer
 25 |         by the same name.
 26 |         
 27 |     Returns:
 28 |       A `Tensor` with one more rank than inputs's. The last dimensionality
 29 |         should be `num_units`.
 30 |     '''
 31 |     with tf.variable_scope(scope, reuse=reuse):
 32 |         lookup_table = tf.get_variable('lookup_table', 
 33 |                                        dtype=tf.float32, 
 34 |                                        shape=[vocab_size, num_units],
 35 |                                        initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))
 36 |         if zero_pad:
 37 |             lookup_table = tf.concat((tf.zeros(shape=[1, num_units]), 
 38 |                                       lookup_table[1:, :]), 0)
 39 | 
 40 |         outputs = tf.nn.embedding_lookup(lookup_table, inputs)
 41 | 
 42 |     return outputs
 43 | 
 44 | 
 45 | def normalize(inputs,
 46 |               scope="normalize",
 47 |               reuse=None):
 48 |     '''Applies layer normalization that normalizes along the last axis.
 49 | 
 50 |     Args:
 51 |       inputs: A tensor with 2 or more dimensions, where the first dimension has
 52 |         `batch_size`. The normalization is over the last dimension.
 53 |       scope: Optional scope for `variable_scope`.
 54 |       reuse: Boolean, whether to reuse the weights of a previous layer
 55 |         by the same name.
 56 | 
 57 |     Returns:
 58 |       A tensor with the same shape and data dtype as `inputs`.
 59 |     '''
 60 |     outputs = tf.contrib.layers.layer_norm(inputs,
 61 |                                            begin_norm_axis=-1,
 62 |                                            scope=scope,
 63 |                                            reuse=reuse)
 64 |     return outputs
 65 | 
 66 | 
 67 | def highwaynet(inputs, num_units=None, scope="highwaynet", reuse=None):
 68 |     '''Highway networks, see https://arxiv.org/abs/1505.00387
 69 | 
 70 |     Args:
 71 |       inputs: A 3D tensor of shape [N, T, W].
 72 |       num_units: An int or `None`. Specifies the number of units in the highway layer
 73 |              or uses the input size if `None`.
 74 |       scope: Optional scope for `variable_scope`.
 75 |       reuse: Boolean, whether to reuse the weights of a previous layer
 76 |         by the same name.
 77 | 
 78 |     Returns:
 79 |       A 3D tensor of shape [N, T, W].
 80 |     '''
 81 |     if not num_units:
 82 |         num_units = inputs.get_shape()[-1]
 83 | 
 84 |     with tf.variable_scope(scope, reuse=reuse):
 85 |         H = tf.layers.dense(inputs, units=num_units, activation=tf.nn.relu, name="dense1")
 86 |         T = tf.layers.dense(inputs, units=num_units, activation=tf.nn.sigmoid,
 87 |                             bias_initializer=tf.constant_initializer(-1.0), name="dense2")
 88 |         outputs = H * T + inputs * (1. - T)
 89 |     return outputs
 90 | 
 91 | def conv1d(inputs,
 92 |            filters=None,
 93 |            size=1,
 94 |            rate=1,
 95 |            padding="SAME",
 96 |            dropout_rate=0,
 97 |            use_bias=True,
 98 |            activation_fn=None,
 99 |            training=True,
100 |            scope="conv1d",
101 |            reuse=None):
102 |     '''
103 |     Args:
104 |       inputs: A 3-D tensor with shape of [batch, time, depth].
105 |       filters: An int. Number of outputs (=activation maps)
106 |       size: An int. Filter size.
107 |       rate: An int. Dilation rate.
108 |       padding: Either `same` or `valid` or `causal` (case-insensitive).
109 |       dropout_rate: A float of [0, 1].
110 |       use_bias: A boolean.
111 |       activation_fn: A string.
112 |       training: A boolean. If True, dropout is applied.
113 |       scope: Optional scope for `variable_scope`.
114 |       reuse: Boolean, whether to reuse the weights of a previous layer
115 |         by the same name.
116 | 
117 |     Returns:
118 |       A masked tensor of the same shape and dtypes as `inputs`.
119 |     '''
120 |     with tf.variable_scope(scope):
121 |         if padding.lower() == "causal":
122 |             # pre-padding for causality
123 |             pad_len = (size - 1) * rate  # padding size
124 |             inputs = tf.pad(inputs, [[0, 0], [pad_len, 0], [0, 0]])
125 |             padding = "valid"
126 | 
127 |         if filters is None:
128 |             filters = inputs.get_shape().as_list()[-1]
129 | 
130 |         params = {"inputs": inputs, "filters": filters, "kernel_size": size,
131 |                   "dilation_rate": rate, "padding": padding, "use_bias": use_bias,
132 |                   "kernel_initializer": tf.contrib.layers.variance_scaling_initializer(), "reuse": reuse}
133 | 
134 |         tensor = tf.layers.conv1d(**params)
135 |         tensor = normalize(tensor)
136 |         if activation_fn is not None:
137 |             tensor = activation_fn(tensor)
138 | 
139 |         tensor = tf.layers.dropout(tensor, rate=dropout_rate, training=training)
140 | 
141 |     return tensor
142 | 
143 | def hc(inputs,
144 |        filters=None,
145 |        size=1,
146 |        rate=1,
147 |        padding="SAME",
148 |        dropout_rate=0,
149 |        use_bias=True,
150 |        activation_fn=None,
151 |        training=True,
152 |        scope="hc",
153 |        reuse=None):
154 |     '''
155 |     Args:
156 |       inputs: A 3-D tensor with shape of [batch, time, depth].
157 |       filters: An int. Number of outputs (=activation maps)
158 |       size: An int. Filter size.
159 |       rate: An int. Dilation rate.
160 |       padding: Either `same` or `valid` or `causal` (case-insensitive).
161 |       use_bias: A boolean.
162 |       activation_fn: A string.
163 |       training: A boolean. If True, dropout is applied.
164 |       scope: Optional scope for `variable_scope`.
165 |       reuse: Boolean, whether to reuse the weights of a previous layer
166 |         by the same name.
167 | 
168 |     Returns:
169 |       A masked tensor of the same shape and dtypes as `inputs`.
170 |     '''
171 |     _inputs = inputs
172 |     with tf.variable_scope(scope):
173 |         if padding.lower() == "causal":
174 |             # pre-padding for causality
175 |             pad_len = (size - 1) * rate  # padding size
176 |             inputs = tf.pad(inputs, [[0, 0], [pad_len, 0], [0, 0]])
177 |             padding = "valid"
178 | 
179 |         if filters is None:
180 |             filters = inputs.get_shape().as_list()[-1]
181 | 
182 | 
183 |         params = {"inputs": inputs, "filters": 2*filters, "kernel_size": size,
184 |                   "dilation_rate": rate, "padding": padding, "use_bias": use_bias,
185 |                   "kernel_initializer": tf.contrib.layers.variance_scaling_initializer(), "reuse": reuse}
186 | 
187 |         tensor = tf.layers.conv1d(**params)
188 |         H1, H2 = tf.split(tensor, 2, axis=-1)
189 |         H1 = normalize(H1, scope="H1")
190 |         H2 = normalize(H2, scope="H2")
191 |         H1 = tf.nn.sigmoid(H1, "gate")
192 |         H2 = activation_fn(H2, "info") if activation_fn is not None else H2
193 |         tensor = H1*H2 + (1.-H1)*_inputs
194 | 
195 |         tensor = tf.layers.dropout(tensor, rate=dropout_rate, training=training)
196 | 
197 |     return tensor
198 | 
199 | def conv1d_transpose(inputs,
200 |                      filters=None,
201 |                      size=3,
202 |                      stride=2,
203 |                      padding='same',
204 |                      dropout_rate=0,
205 |                      use_bias=True,
206 |                      activation=None,
207 |                      training=True,
208 |                      scope="conv1d_transpose",
209 |                      reuse=None):
210 |     '''
211 |         Args:
212 |           inputs: A 3-D tensor with shape of [batch, time, depth].
213 |           filters: An int. Number of outputs (=activation maps)
214 |           size: An int. Filter size.
215 |           rate: An int. Dilation rate.
216 |           padding: Either `same` or `valid` or `causal` (case-insensitive).
217 |           dropout_rate: A float of [0, 1].
218 |           use_bias: A boolean.
219 |           activation_fn: A string.
220 |           training: A boolean. If True, dropout is applied.
221 |           scope: Optional scope for `variable_scope`.
222 |           reuse: Boolean, whether to reuse the weights of a previous layer
223 |             by the same name.
224 | 
225 |         Returns:
226 |           A tensor of the shape with [batch, time*2, depth].
227 |         '''
228 |     with tf.variable_scope(scope, reuse=reuse):
229 |         if filters is None:
230 |             filters = inputs.get_shape().as_list()[-1]
231 |         inputs = tf.expand_dims(inputs, 1)
232 |         tensor = tf.layers.conv2d_transpose(inputs,
233 |                                    filters=filters,
234 |                                    kernel_size=(1, size),
235 |                                    strides=(1, stride),
236 |                                    padding=padding,
237 |                                    activation=None,
238 |                                    kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),
239 |                                    use_bias=use_bias)
240 |         tensor = tf.squeeze(tensor, 1)
241 |         tensor = normalize(tensor)
242 |         if activation is not None:
243 |             tensor = activation(tensor)
244 | 
245 |         tensor = tf.layers.dropout(tensor, rate=dropout_rate, training=training)
246 | 
247 |     return tensor
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/networks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #/usr/bin/python2
  3 | '''
  4 | By kyubyong park. kbpark.linguist@gmail.com. 
  5 | https://www.github.com/kyubyong/multi-speech-corpora/dc_tts
  6 | '''
  7 | 
  8 | from __future__ import print_function
  9 | 
 10 | from hyperparams import Hyperparams as hp
 11 | from modules import *
 12 | import tensorflow as tf
 13 | 
 14 | def TextEnc(L, training=True):
 15 |     '''
 16 |     Args:
 17 |       L: Text inputs. (B, N)
 18 | 
 19 |     Return:
 20 |         K: Keys. (B, N, d)
 21 |         V: Values. (B, N, d)
 22 |     '''
 23 |     i = 1
 24 |     tensor = embed(L,
 25 |                    vocab_size=len(hp.vocab),
 26 |                    num_units=hp.e,
 27 |                    scope="embed_{}".format(i)); i += 1
 28 |     tensor = conv1d(tensor,
 29 |                     filters=2*hp.d,
 30 |                     size=1,
 31 |                     rate=1,
 32 |                     dropout_rate=hp.dropout_rate,
 33 |                     activation_fn=tf.nn.relu,
 34 |                     training=training,
 35 |                     scope="C_{}".format(i)); i += 1
 36 |     tensor = conv1d(tensor,
 37 |                     size=1,
 38 |                     rate=1,
 39 |                     dropout_rate=hp.dropout_rate,
 40 |                     training=training,
 41 |                     scope="C_{}".format(i)); i += 1
 42 | 
 43 |     for _ in range(2):
 44 |         for j in range(4):
 45 |             tensor = hc(tensor,
 46 |                             size=3,
 47 |                             rate=3**j,
 48 |                             dropout_rate=hp.dropout_rate,
 49 |                             activation_fn=None,
 50 |                             training=training,
 51 |                             scope="HC_{}".format(i)); i += 1
 52 |     for _ in range(2):
 53 |         tensor = hc(tensor,
 54 |                         size=3,
 55 |                         rate=1,
 56 |                         dropout_rate=hp.dropout_rate,
 57 |                         activation_fn=None,
 58 |                         training=training,
 59 |                         scope="HC_{}".format(i)); i += 1
 60 | 
 61 |     for _ in range(2):
 62 |         tensor = hc(tensor,
 63 |                         size=1,
 64 |                         rate=1,
 65 |                         dropout_rate=hp.dropout_rate,
 66 |                         activation_fn=None,
 67 |                         training=training,
 68 |                         scope="HC_{}".format(i)); i += 1
 69 | 
 70 |     K, V = tf.split(tensor, 2, -1)
 71 |     return K, V
 72 | 
 73 | def AudioEnc(S, training=True):
 74 |     '''
 75 |     Args:
 76 |       S: melspectrogram. (B, T/r, n_mels)
 77 | 
 78 |     Returns
 79 |       Q: Queries. (B, T/r, d)
 80 |     '''
 81 |     i = 1
 82 |     tensor = conv1d(S,
 83 |                     filters=hp.d,
 84 |                     size=1,
 85 |                     rate=1,
 86 |                     padding="CAUSAL",
 87 |                     dropout_rate=hp.dropout_rate,
 88 |                     activation_fn=tf.nn.relu,
 89 |                     training=training,
 90 |                     scope="C_{}".format(i)); i += 1
 91 |     tensor = conv1d(tensor,
 92 |                     size=1,
 93 |                     rate=1,
 94 |                     padding="CAUSAL",
 95 |                     dropout_rate=hp.dropout_rate,
 96 |                     activation_fn=tf.nn.relu,
 97 |                     training=training,
 98 |                     scope="C_{}".format(i)); i += 1
 99 |     tensor = conv1d(tensor,
100 |                     size=1,
101 |                     rate=1,
102 |                     padding="CAUSAL",
103 |                     dropout_rate=hp.dropout_rate,
104 |                     training=training,
105 |                     scope="C_{}".format(i)); i += 1
106 |     for _ in range(2):
107 |         for j in range(4):
108 |             tensor = hc(tensor,
109 |                             size=3,
110 |                             rate=3**j,
111 |                             padding="CAUSAL",
112 |                             dropout_rate=hp.dropout_rate,
113 |                             training=training,
114 |                             scope="HC_{}".format(i)); i += 1
115 |     for _ in range(2):
116 |         tensor = hc(tensor,
117 |                         size=3,
118 |                         rate=3,
119 |                         padding="CAUSAL",
120 |                         dropout_rate=hp.dropout_rate,
121 |                         training=training,
122 |                         scope="HC_{}".format(i)); i += 1
123 | 
124 |     return tensor
125 | 
126 | def Attention(Q, K, V, mononotic_attention=False, prev_max_attentions=None):
127 |     '''
128 |     Args:
129 |       Q: Queries. (B, T/r, d)
130 |       K: Keys. (B, N, d)
131 |       V: Values. (B, N, d)
132 |       mononotic_attention: A boolean. At training, it is False.
133 |       prev_max_attentions: (B,). At training, it is set to None.
134 | 
135 |     Returns:
136 |       R: [Context Vectors; Q]. (B, T/r, 2d)
137 |       alignments: (B, N, T/r)
138 |       max_attentions: (B, T/r)
139 |     '''
140 |     A = tf.matmul(Q, K, transpose_b=True) * tf.rsqrt(tf.to_float(hp.d))
141 |     if mononotic_attention:  # for inference
142 |         key_masks = tf.sequence_mask(prev_max_attentions, hp.max_N)
143 |         reverse_masks = tf.sequence_mask(hp.max_N - hp.attention_win_size - prev_max_attentions, hp.max_N)[:, ::-1]
144 |         masks = tf.logical_or(key_masks, reverse_masks)
145 |         masks = tf.tile(tf.expand_dims(masks, 1), [1, hp.max_T, 1])
146 |         paddings = tf.ones_like(A) * (-2 ** 32 + 1)  # (B, T/r, N)
147 |         A = tf.where(tf.equal(masks, False), A, paddings)
148 |     A = tf.nn.softmax(A) # (B, T/r, N)
149 |     max_attentions = tf.argmax(A, -1)  # (B, T/r)
150 |     R = tf.matmul(A, V)
151 |     R = tf.concat((R, Q), -1)
152 | 
153 |     alignments = tf.transpose(A, [0, 2, 1]) # (B, N, T/r)
154 | 
155 |     return R, alignments, max_attentions
156 | 
157 | def AudioDec(R, training=True):
158 |     '''
159 |     Args:
160 |       R: [Context Vectors; Q]. (B, T/r, 2d)
161 | 
162 |     Returns:
163 |       Y: Melspectrogram predictions. (B, T/r, n_mels)
164 |     '''
165 | 
166 |     i = 1
167 |     tensor = conv1d(R,
168 |                     filters=hp.d,
169 |                     size=1,
170 |                     rate=1,
171 |                     padding="CAUSAL",
172 |                     dropout_rate=hp.dropout_rate,
173 |                     training=training,
174 |                     scope="C_{}".format(i)); i += 1
175 |     for j in range(4):
176 |         tensor = hc(tensor,
177 |                         size=3,
178 |                         rate=3**j,
179 |                         padding="CAUSAL",
180 |                         dropout_rate=hp.dropout_rate,
181 |                         training=training,
182 |                         scope="HC_{}".format(i)); i += 1
183 | 
184 |     for _ in range(2):
185 |         tensor = hc(tensor,
186 |                         size=3,
187 |                         rate=1,
188 |                         padding="CAUSAL",
189 |                         dropout_rate=hp.dropout_rate,
190 |                         training=training,
191 |                         scope="HC_{}".format(i)); i += 1
192 |     for _ in range(3):
193 |         tensor = conv1d(tensor,
194 |                         size=1,
195 |                         rate=1,
196 |                         padding="CAUSAL",
197 |                         dropout_rate=hp.dropout_rate,
198 |                         activation_fn=tf.nn.relu,
199 |                         training=training,
200 |                         scope="C_{}".format(i)); i += 1
201 |     # mel_hats
202 |     logits = conv1d(tensor,
203 |                     filters=hp.n_mels,
204 |                     size=1,
205 |                     rate=1,
206 |                     padding="CAUSAL",
207 |                     dropout_rate=hp.dropout_rate,
208 |                     training=training,
209 |                     scope="C_{}".format(i)); i += 1
210 |     Y = tf.nn.sigmoid(logits) # mel_hats
211 | 
212 |     return logits, Y
213 | 
214 | def SSRN(Y, training=True):
215 |     '''
216 |     Args:
217 |       Y: Melspectrogram Predictions. (B, T/r, n_mels)
218 | 
219 |     Returns:
220 |       Z: Spectrogram Predictions. (B, T, 1+n_fft/2)
221 |     '''
222 | 
223 |     i = 1 # number of layers
224 | 
225 |     # -> (B, T/r, c)
226 |     tensor = conv1d(Y,
227 |                     filters=hp.c,
228 |                     size=1,
229 |                     rate=1,
230 |                     dropout_rate=hp.dropout_rate,
231 |                     training=training,
232 |                     scope="C_{}".format(i)); i += 1
233 |     for j in range(2):
234 |         tensor = hc(tensor,
235 |                       size=3,
236 |                       rate=3**j,
237 |                       dropout_rate=hp.dropout_rate,
238 |                       training=training,
239 |                       scope="HC_{}".format(i)); i += 1
240 |     for _ in range(2):
241 |         # -> (B, T/2, c) -> (B, T, c)
242 |         tensor = conv1d_transpose(tensor,
243 |                                   scope="D_{}".format(i),
244 |                                   dropout_rate=hp.dropout_rate,
245 |                                   training=training,); i += 1
246 |         for j in range(2):
247 |             tensor = hc(tensor,
248 |                             size=3,
249 |                             rate=3**j,
250 |                             dropout_rate=hp.dropout_rate,
251 |                             training=training,
252 |                             scope="HC_{}".format(i)); i += 1
253 |     # -> (B, T, 2*c)
254 |     tensor = conv1d(tensor,
255 |                     filters=2*hp.c,
256 |                     size=1,
257 |                     rate=1,
258 |                     dropout_rate=hp.dropout_rate,
259 |                     training=training,
260 |                     scope="C_{}".format(i)); i += 1
261 |     for _ in range(2):
262 |         tensor = hc(tensor,
263 |                         size=3,
264 |                         rate=1,
265 |                         dropout_rate=hp.dropout_rate,
266 |                         training=training,
267 |                         scope="HC_{}".format(i)); i += 1
268 |     # -> (B, T, 1+n_fft/2)
269 |     tensor = conv1d(tensor,
270 |                     filters=1+hp.n_fft//2,
271 |                     size=1,
272 |                     rate=1,
273 |                     dropout_rate=hp.dropout_rate,
274 |                     training=training,
275 |                     scope="C_{}".format(i)); i += 1
276 | 
277 |     for _ in range(2):
278 |         tensor = conv1d(tensor,
279 |                         size=1,
280 |                         rate=1,
281 |                         dropout_rate=hp.dropout_rate,
282 |                         activation_fn=tf.nn.relu,
283 |                         training=training,
284 |                         scope="C_{}".format(i)); i += 1
285 |     logits = conv1d(tensor,
286 |                size=1,
287 |                rate=1,
288 |                dropout_rate=hp.dropout_rate,
289 |                training=training,
290 |                scope="C_{}".format(i))
291 |     Z = tf.nn.sigmoid(logits)
292 |     return logits, Z
293 | 


--------------------------------------------------------------------------------
/prepo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #/usr/bin/python2
 3 | '''
 4 | By kyubyong park. kbpark.linguist@gmail.com.
 5 | https://www.github.com/kyubyong/multi-speech-corpora/dc_tts
 6 | '''
 7 | 
 8 | from __future__ import print_function
 9 | 
10 | from utils import load_spectrograms
11 | import os
12 | from data_load import load_data
13 | import numpy as np
14 | import tqdm
15 | from hyperparams import Hyperparams as hp
16 | 
17 | # Load data
18 | fpaths, _, _ = load_data() # list
19 | 
20 | for fpath in tqdm.tqdm(fpaths):
21 |     fname, mel, mag = load_spectrograms(fpath)
22 |     if not os.path.exists("{}/mels".format(hp.lang)): os.makedirs("{}/mels".format(hp.lang))
23 |     if not os.path.exists("{}/mags".format(hp.lang)): os.makedirs("{}/mags".format(hp.lang))
24 | 
25 |     np.save("{}/mels/{}".format(hp.lang, fname.replace("wav", "npy")), mel)
26 |     np.save("{}/mags/{}".format(hp.lang, fname.replace("wav", "npy")), mag)


--------------------------------------------------------------------------------
/rulebook.txt:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------------------
  2 | ## 한국어 발음규칙 (Korean G2P Rulebook)
  3 | ## Last updated: 2017-01-13
  4 | ## Yejin Cho (scarletcho@gmail.com)
  5 | ## 처리순서:
  6 | ##     예외처리 - 유기음화(겹받침) - 겹받침관련규칙 - 경음화 - 겹받침단순화 - 비음화 - 리을 재음절화
  7 | ##     - 유음화 - 구개음화 - 유기음화(홑받침) - 연음 - 종성중화 - 리을 재음절화 [종료]
  8 | ## ----------------------------------------------------------------------------------------
  9 | ## 예외처리
 10 | ii`ll`@y([aeoquv])`	ii`ll`rr`y\1`	# 일 연대, 삼십일여간
 11 | ii`ll`#`@y([aeoquv])`	ii`ll`#`rr`y\1`	# 일 연대, 삼십일여간
 12 | (h0`aa|t0`xx)`ll`#`@ii`ll`	\1`ll`#`rr`ii`ll`	# 들일, 볼일, 할일
 13 | (h0`aa|t0`xx)`ll`@ii`ll`	\1`ll`rr`ii`ll`	# 들일, 볼일, 할일
 14 | s0`vv`ll`@ii`kf`	s0`vv`ll`rr`ii`kf`	# 설익(다)
 15 | mm`uu`ll`@k0`oo`@k0`ii`	mm`uu`ll`kk`oo`k0`ii`	# 물고기
 16 | s0`ii`ll`@s0`ii`ll	s0`ii`ll`s0`ii`ll	# 실실
 17 | k0`ii`@s0`xx`lk`	k0`ii`s0`xx`kf`	# 기슭
 18 | c0`vv`ll`@ya`kf`	c0`vv`rr`ya`kf`	# 절약
 19 | k0`xx`mf`@yo`@ii`ll`	k0`xx`@mm`yo`@ii`ll`	# 금요일
 20 | lt`@ii`	ll`@ch`ii`	# 훑이
 21 | (?<=nn`vv`)lb`(?=@(c0`(uu|vv)`kf|t0`(uu|vv)`ng))	pf`	# 넓죽/넓둥글다
 22 | (?<=s0`ii`)lh`@c0`(?=xx`ng)	ll`cc`	# 싫증
 23 | t0`aa`lk`	t0`aa`kf`	# 닭
 24 | (wq|we|oo)`nf`@k0`aa`c0`	\1`nf`k0`aa`tf`	# 온갖
 25 | mm`aa`tf`@h0`yv`ng`	mm`aa`th`yv`ng`	# 맏형
 26 | k0`vv`th`@oo`s0`	k0`vv`t0`oo`tf`	# 겉옷
 27 | c0`uu`ll`@nn`vv`mf`@k0`ii`	c0`uu`ll`rr`vv`mf`@kk`ii`	# 줄넘기
 28 | h0`oo`th`@ii`@p0`uu`ll`	h0`oo`nf`nn`ii`p0`uu`ll`	# 홑이불
 29 | s0`aa`ks`@ii`ll`	s0`aa`ng`nn`ii`ll`	# 삯일
 30 | mm`qq`nf`@ii`pf`	mm`qq`nf`nn`ii`pf`	# 맨입
 31 | kk`oo`ch`@ii`ph`	kk`oo`nf`nn`ii`pf`	# 꽃잎
 32 | nn`qq`@p0`oo`kf`@ya`kf`	nn`qq`p0`oo`ng`nn`ya`kf`	# 내복약
 33 | h0`aa`nf`@yv`@rr`xx`mf`	h0`aa`nf`nn`yv`rr`xx`mf`	# 한여름
 34 | nn`aa`mf`@c0`oo`nf`@yv`@p0`ii`	nn`aa`mf`c0`oo`nf`nn`yv`p0`ii`	# 남존여비
 35 | s0`ii`nf`@yv`@s0`vv`ng`	s0`ii`nf`nn`yv`s0`vv`ng`	# 신여성
 36 | s0`qq`kf`@yv`nf`@ph`ii`ll`	s0`qq`ng`nn`yv`nf`ph`ii`ll`	# 색연필
 37 | t0`aa`mf`@yo`	t0`aa`mf`nn`yo`	# 담요
 38 | nn`uu`nf`@yo`@k0`ii`	nn`uu`nf`nn`yo`k0`ii`	# 눈요기
 39 | vv`pf`@yo`ng`	vv`mf`nn`yo`ng`	# (영)업용
 40 | s0`ii`kf`@yo`ng`@yu`	s0`ii`k0`yo`ng`nn`yu`	# 식용유
 41 | nf`@yu`nf`@rr`ii`	nf`nn`yu`ll`rr`ii`	# (국민)윤리
 42 | (c0|s0)`(aa|oo|uu)`ll`#`@ii`(ph|p0|pf)`	\1`\2`ll`#`rr`ii`pf`	# 잘입다, 솔잎, 술잎
 43 | (c0|s0)`(aa|oo|uu)`ll`@ii`(ph|p0|pf)`	\1`\2`ll`rr`ii`pf`	# 잘입다, 솔잎, 술잎
 44 | (?<=(^`@))h0`aa`nf`#`@ii`ll`	h0`aa`nf`#`nn`ii`ll`	# 한일
 45 | (?<=(^`@))h0`aa`nf`@ii`ll`	h0`aa`nf`nn`ii`ll`	# 한일
 46 | (?<=(#`@))h0`aa`nf`#`@ii`ll`	h0`aa`nf`#`nn`ii`ll`	# 한일
 47 | (?<=(^`@))mm`aa`kf`@ii`ll`	mm`aa`ng`nn`ii`ll`	# 막일
 48 | (?<=(#`@))mm`aa`kf`@ii`ll`	mm`aa`ng`nn`ii`ll`	# 막일
 49 | mm`oo`ll`@s0`aa`ng`@s0`ii`kf`	mm`oo`ll`ss`aa`ng`s0`ii`kf`	# 몰상식
 50 | oo`s0`#`@ii`pf`	oo`nf`#`@nn`ii`pf`	# 옷입(다)
 51 | (nf|ll)`@yv`@s0`vv`@s0`	\1`nn`yv`s0`vv`tf`	# (스물/서른)여섯
 52 | (ng|mf|nf)`#`@y([aeoquv])`	\1`#`nn`y\2`	# 밤윷, 직행열차, 콩엿, 볶은엿
 53 | (ng|mf|nf)`@y([aeoquv])`	\1`nn`y\2`	# 밤윷, 직행열차, 콩엿, 볶은엿
 54 | (wv|ii)`ll`@y([aeoquv])`	\1`rr`y\2`	# 일/월요일
 55 | ll`#`@y([aeoquv])`	ll`#`rr`y\1`	# 불여우, 물약, 서울역, 물엿, 물옆, 굴옆, 휘발유, 유들유들
 56 | ll`@y([aeoquv])`	ll`rr`y\1`	# 불여우, 물약, 서울역, 물엿, 물옆, 굴옆, 휘발유, 유들유들
 57 | ii`ll`@c0`vv`ll`	ii`ll`cc`vv`ll`	# 일절
 58 | (th|tf|s0)`@y([aeoquv])`	nf`@nn`y\2`	# 쑥갓요
 59 | (<=^|#)mm`aa`kf`@ii`ll	mm`aa`ng`nn`ii`ll	# 막일
 60 | k0`uu`@k0`xx`nf`@rr`yu`	k0`uu`k0`xx`nf`nn`yu`	# 구근류
 61 | k0`aa`ll`@([ct])0`xx`ng`	k0`aa`ll`\1\1`xx`ng`	# 갈등/갈증
 62 | p0`aa`ll`@t0`oo`ng`	p0`aa`ll`tt`oo`ng`	# 발동
 63 | c0`vv`ll`@t0`oo`	c0`vv`ll`tt`oo`	# 절도
 64 | mm`aa`ll`@s0`aa`ll`	mm`aa`ll`ss`aa`ll`	# 말살
 65 | p0`uu`ll`@s0`	p0`uu`ll`ss`	# 불소/불세출
 66 | ii`ll`@s0`ii`	ii`ll`ss`ii`	# 일시
 67 | p0`aa`ll`@c0`vv`nf`	p0`aa`ll`cc`vv`nf`	# 발전
 68 | (?<=(s0`ii`nf`|s0`aa`mf`)@)(c|k|t)0`	\2\2`	# 신고, 신다, 신자, 삼고, 삼다, 삼자
 69 | (?<=k0`ii`mf`@)p0`	pp`	# 김밥
 70 | (?<=t0`vv`@t0`xx`mf`@)c0`	cc`	# 더듬지
 71 | c0`aa`mf`@c0`aa`@rr`ii`	c0`aa`mf`cc`aa`rr`ii`	# 잠자리
 72 | (?<=(ng|ll)`@)c0`(?=uu`ll`@k0`ii)	cc`	# 물줄기, 강줄기
 73 | (?<=(nf|ll)`@)p0`vv`pf`	pp`vv`pf`	# 문법, 불법
 74 | (?<=(nf|tf)`@)p0`(?=aa`@rr`aa`mf)	pp`	# 신바람, 늦바람
 75 | p0`aa`@rr`aa`mf`@k0`yv`ll`	p0`aa`rr`aa`mf`kk`yv`ll`	# 바람결
 76 | (?<=(mf|kf)`@)p0`(?=aa`pf`)	pp`	# 아침밥, 점심밥, 저녁밥
 77 | (?<=nn`uu`nf`@)t0`	tt`	# 눈동자, 눈대중
 78 | mm`aa`kf`@yv`mf`	mm`aa`ng`nn`yv`mf`	# 늑막염, 결막염
 79 | p0`aa`lb`@(t|k)0`	p0`aa`pf`\1\1`	# 밟다, 밟고
 80 | p0`aa`lb`@nn`	p0`aa`mf`nn`	# 밟는
 81 | nn`vv`lb`@(t|k)0`	nn`vv`ll`\1\1`	# 넓다, 넓고
 82 | mm`(aa|vv)`s0`@ii`ss`@t0`aa`	mm`\1`t0`ii`tf`tt`aa`	# 맛있다
 83 | mm`(aa|vv)`s0`@vv`ps`@t0`aa`	mm`\1`t0`vv`pf`tt`aa`	# 맛없다
 84 | c0`vv`c0`@vv`@mm`ii`	c0`vv`t0`vv`mm`ii`	# 젖어미
 85 | h0`vv`s0`@uu`s0`@xx`mf`	h0`vv`t0`uu`s0`xx`mf`	# 헛웃음
 86 | k0`aa`ps`@vv`@ch`ii`	k0`aa`p0`vv`ch`ii`	# 값어치
 87 | k0`aa`ps`@ii`ss`@nn`xx`nf`	k0`aa`p0`ii`nf`nn`xx`nf`	# 값있는
 88 | c0`vv`lm`@c0`ii`	c0`vv`mf`cc`ii`	# 젊지
 89 | oo`lm`@k0`(?=[iy])	oo`mf`k0`	# 옮기(다)
 90 | k0`uu`lm`@k0`ii`@t0`aa`	k0`uu`mf`k0`ii`t0`aa`	# 굶기다
 91 | (nn|k0|h0)`aa`ll`#`@(p|s|c|k|t)0`	\1`aa`ll`#`\2\2`	# 갈바, 할바, 만날것
 92 | (nn|k0|h0)`aa`ll`@(p|s|c|k|t)0`	\1`aa`ll`\2\2`	# 갈바, 할바, 만날것
 93 | ch`vv`s0`@ii`nf`	ch`vv`t0`ii`nf`	# 첫인(상)
 94 | (?<=(mf|nf)`@)ii`@p0`uu`ll`	nn`ii`p0`uu`ll`	# 솜이불
 95 | (?<=(nf|ll)`@)k0`oo`@rr`ii`	kk`oo`rr`ii`	# 문고리
 96 | (?<=(nf|ll)`@)s0`qq`	ss`qq`	# 산새, 들새
 97 | (?<=(nf|ll)`@)c0`qq`@c0`uu`	cc`qq`c0`uu`	# 손재주, 글재주
 98 | k0`ii`ll`@k0`aa`	k0`ii`ll`kk`aa`	# 길가
 99 | mm`uu`ll`@t0`oo`ng`@ii`	mm`uu`ll`tt`oo`ng`ii`	# 물동이
100 | mm`uu`ll`@c0`	mm`uu`ll`@cc`	# 물증
101 | (?<=(nf|ll)`@)p0`aa`@t0`aa`kf`	pp`aa`t0`aa`kf`	# 발바닥, 손바닥
102 | (?<=(nf|ll)`@)s0`oo`kf`	ss`oo`kf`	# 굴속, 물속
103 | (?<=s0`uu`ll`@)(c|p|t)0`	\1\1`	# 술잔, 술독, 술병, 술자리
104 | k0`aa`ng`@k0`aa`	k0`aa`ng`kk`aa`	# 강가
105 | (?<=(ng|mf)`@)t0`aa`ll`	tt`aa`ll`	# 초승달
106 | t0`xx`ng`@p0`uu`ll`	t0`xx`ng`pp`uu`ll`	# 등불
107 | ch`aa`ng`@s0`aa`ll`	ch`aa`ng`ss`aa`ll`	# 창살
108 | (?<=(ll|ng)`@)c0`uu`ll`@k0`ii`	k0`aa`ng`cc`uu`ll`k0`ii`	# 강줄기, 물줄기
109 | aa`nf`@k0`oo`	aa`nf`kk`oo`	# 안고
110 | (?<=kk`yv`@aa`nf`@)(t|c)0`	\1\1`	# 껴안지, 껴안다
111 | ii`@c0`uu`kf`@ii`@c0`uu`kf`	ii`c0`uu`ng`nn`ii`c0`uu`kf`	# 이죽이죽
112 | ya`@k0`xx`mf`@ya`@k0`xx`mf`	ya`k0`xx`mf`nn`ya`k0`xx`mf`	# 야금야금
113 | p0`ee`@k0`qq`s0`@ii`s0`	p0`ee`k0`qq`nf`nn`ii`tf`	# 베갯잇
114 | kk`qq`s0`@ii`ph`	kk`qq`nf`nn`ii`pf`	# 깻잎
115 | nn`aa`@mm`uu`s0`@ii`ph`	nn`aa`mm`uu`nf`nn`ii`pf`	# 나뭇잎
116 | qq`s0`@yv`ll`	qq`nf`nn`yv`ll`	# 도리깻열
117 | t0`wi`s0`@(?=[aeqiouyvwx])	t0`wi`nf`@nn`	# 뒷윷, 뒷얘기
118 | nn`xx`c0`@yv`@rr`xx`mf`	nn`xx`tf`nn`yv`rr`xx`mf`	# 늦여름
119 | t0`ii`@k0`xx`tf`@(ii|xx|ee)`	t0`ii`k0`xx`s0`\1`	# 디귿에, 디귿이
120 | (c0|ch|th|h0)`ii`@xx`(c0|ch|th|h0)`@(ii|xx|ee)`	\1`ii`xx`s0`\3`	# 치읓이, 지읒에
121 | ph`ii`@xx`ph`@(ii|xx|ee)`	ph`ii`xx`p0`\1`	# 피읖에
122 | kh`ii`@xx`kh`@(ii|xx|ee)`	kh`ii`xx`k0`\1`	# 키읔이
123 | ## 유기음화 (겹받침)
124 | l(b|p)`@h0`	ll`@ph`
125 | nh`@(c|k|t)0`	nf`@\1h`
126 | lh`@(c|k|t)0`	ll`@\1h`
127 | lk`@h0`	ll`@kh`
128 | nc`@h0`	nf`@ch`
129 | ## 겹받침 규칙 (ㄹㅎ)
130 | (k0`aa`|k0`uu`|k0`vv`|oo`|p0`aa`|nn`aa`|nn`xx`|p0`uu`|^ii`|@`ii`mm`aa`|mm`uu`|(^|@`)vv`)lk`@(t0|c0|s0)`	\1kf`@\3`
131 | (k0`aa`|k0`uu`|k0`vv`|vv`|oo`|mm`aa`|p0`aa`|nn`aa`|nn`xx`|mm`uu`|p0`uu`|^ii`|@`ii`)lk`@k0`	\1ll`@kk`
132 | ## 겹받침 규칙 (ㄴㅎ)
133 | nh`@(k|t|c)0`	nf`@\1h`
134 | nh`@s0`	nf`@ss`
135 | nh`@nn`	nf`@nn`
136 | nh`@(?=[aeqiouyvwx])	@nn`
137 | ## 겹받침 규칙 (ㄹㅎ)
138 | lh`@nn`	ll`@rr`
139 | lh`@(k|t|c)0`	ll`@\1h`
140 | lh`@s0`	ll`@ss`
141 | lh`@(?=[aeqiouyvwx])	@rr`
142 | ## 겹받침 규칙 (ㄴㅈ)
143 | nc`@([ktsc])0`	nf`@\1\1`
144 | ## 겹받침 규칙 (ㄹㅁ)
145 | (c0`vv`|c0`ii`|k0`uu`|t0`aa`|(^|@`)oo`|k0`oo`)lm`@([ktsc])0`	\1mf`@\3\3`
146 | ## 겹받침 규칙 (ㄹㅂ)
147 | (p0`aa`|tt`vv`|(^|@`)yv`|nn`vv`|(^|@`)ya`|cc`aa`)lb`@([ktsc])0`	\1ll`@\4\4`
148 | ## 겹받침 규칙 (ㄹㅌ)
149 | h0`(aa|uu)`lt`@nn`	h0`\1`ll`@ll`
150 | h0`(aa|uu)`lt`@([ktsc])0`	h0`\1`ll`@\2\2`
151 | ## 경음화
152 | lk`@(c|k|p|s|t)0`	kf`@\1\1`
153 | l(b|p)`@p0`	pf`@pp`
154 | s0`@p0`	tf`@pp`
155 | l(b|t)`@(c|k|s|t|p)0`	ll`@\2\2`
156 | lp`@(c|k|s|t)0`	pf`@\1\1`
157 | (c[h0]|s[s0]|t[fh])`@(c|k|s|t)0`	tf`@\2\2`
158 | k[fhks]`@(c|k|p|s|t)0`	kf`@\1\1`
159 | p[sfh]`@(c|k|p|s|t)0`	pf`@\1\1`
160 | (?<=(kf|kh|ks|ss|c0|ch|tf|th)`@)p0`	pp`
161 | h0`@s0`	@ss`
162 | nh`@s0`	nf`@ss`
163 | lh`@s0`	ll`@ss`
164 | ## 겹받침 단순화: 어말 또는 자음 앞
165 | (ks|lk)`(?=(#|$|@[ptkshcmnr]))	kf`
166 | n[ch]`(?=(#|$|@[ptkshcmnr]))	nf`
167 | l[bsth]`(?=(#|$|@[ptkshcmnr]))	ll`
168 | lm`(?=(#|$|@[ptkshcmnr]))	mf`
169 | (ps|lp)`(?=(#|$|@[ptkshcmnr]))	pf`
170 | ## 겹받침 단순화: 모음 앞
171 | ([kp])s`@(?=[aeqiouyvwx])	\1f`@ss`
172 | ls`@(?=[aeqiouyvwx])	ll`@ss`
173 | nc`@(?=[aeqiouyvwx])	nf`@c0`
174 | lk`@(?=[aeqiouyvwx])	ll`@k0`
175 | lm`@(?=[aeqiouyvwx])	ll`@mm`
176 | lb`@(?=[aeqiouyvwx])	ll`@p0`
177 | l([tp])`@(?=[aeqiouyvwx])	ll`@\1h`
178 | ## 비음화
179 | (?<=[pk])0`@rr`	f`@nn`
180 | (c0|ch|s0|ss|tf|nh|h0)`@nn`	nf`@nn`
181 | nc`@(p|t|k)0`	nf`@\1\1`
182 | nc`(?=@[ptkshcmnr])	nf`
183 | lm`@k0`	mf`@kk`
184 | lm`(?=@[ptkshcmnr])	mf`
185 | k[fhks]`#`(?=@(nn|mm)`)	ng`#`
186 | k[fhks]`(?=@(nn|mm)`)	ng`
187 | lk`(?=@(nn|mm)`)	ng`
188 | p[sfh]`#`(?=@(nn|mm)`)	mf`#`
189 | p[sfh]`(?=@(nn|mm)`)	mf`
190 | l[bp]`(?=@(nn|mm)`)	mf`
191 | (?<=(mf|ng|pf|kf)`@)rr`	nn`
192 | (c0|ch|s0|ss|tf|nh|h0)`(?=(?:#`)?@mm`)	nf`
193 | ## 리을 재음절화
194 | ll`@(?=y)	@rr`
195 | ## 유음화
196 | (nf|ll)`@rr`	ll`@rr`
197 | l[lht]`#`@nn`	ll`#`@rr`
198 | l[lht]`@nn`	ll`@rr`
199 | ## 구개음화
200 | tf`@(?=[iy])	@c0`
201 | th`@(?=[iy])	@ch`
202 | tf`@h0`(?=[iy])	@ch`
203 | ## 유기음화 (홑받침)
204 | (p|k)f`#`@h0`	#`@\1h`
205 | (p|k)f`@h0`	@\1h`
206 | h0`@(c|k|t)0`	@\1h`
207 | (tf|th|s0)`@h0`	@th`
208 | (tf|th|s0)`#`@h0`	#`@th`
209 | ## 연음규칙
210 | (s0|ss|kk|p0|ph|pp|t0|th|tt|c0|ch|kh|kk|k0|mm|nn)`@(?=[aeqiouyvwx])	@\1`
211 | nh`@(?=[aeqiouyvwx])	@nn`
212 | (s0|ss|c0|ch|th)`(?=@[ptkshcmnr])	tf`
213 | h0`@(?=[aeqiouyvwx])	@
214 | lh`@?(?=[aeqiouyvwx])	@rr`
215 | (p|t|k)f`#`@?(?=[aeqiouyvwx])	#`@\g<1>0`
216 | (p|t|k)f`@?(?=[aeqiouyvwx])	@\g<1>0`
217 | (m|n)f`@?(?=[aeqiouyvwx])	@\1\1`
218 | ## 종성규칙
219 | (s0|ss|c0|ch|th)`(?=@|#|$)	tf`
220 | (kh|kk|ks|lk)`(?=@|#|$|[ptkshcmnr])	kf`	# (ks|lk)`(?=@[ptkshcmnr])	kf`
221 | (ph|lp|ps)`(?=@|#|$|[ptkshcmnr])	pf`
222 | (?<=[ptkshcmnr].)`@(?=[aeqiouyvwx])	`
223 | l[bhstp]`(?=@|#|$|[ptkshcmnr])	ll`	# l[bt]`(?=@[ptkshcmnr])	ll`
224 | nh`(?=@|#|$|[ptkshcmnr])	nf`@
225 | ## 리을 재음절화
226 | (?<=[aeqiouyvwx].`)ll`@(?=[aeqiouyvwx])	@rr`
227 | ll`@ll`	ll`@rr`


--------------------------------------------------------------------------------
/synthesize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # /usr/bin/python2
 3 | '''
 4 | By kyubyong park. kbpark.linguist@gmail.com.
 5 | https://www.github.com/kyubyong/multi-speech-corpora/dc_tts
 6 | '''
 7 | 
 8 | from __future__ import print_function
 9 | 
10 | import os
11 | 
12 | from hyperparams import Hyperparams as hp
13 | import numpy as np
14 | import tensorflow as tf
15 | from train import Graph
16 | from utils import *
17 | from data_load import load_data
18 | from scipy.io.wavfile import write
19 | from tqdm import tqdm
20 | 
21 | def synthesize():
22 |     # Load data
23 |     L = load_data("synthesize")
24 | 
25 |     # Load graph
26 |     g = Graph(mode="synthesize"); print("Graph loaded")
27 | 
28 |     with tf.Session() as sess:
29 |         sess.run(tf.global_variables_initializer())
30 | 
31 |         # Restore parameters
32 |         var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'Text2Mel')
33 |         saver1 = tf.train.Saver(var_list=var_list)
34 |         saver1.restore(sess, tf.train.latest_checkpoint(hp.logdir + "-1"))
35 |         print("Text2Mel Restored!")
36 | 
37 |         var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'SSRN') + \
38 |                    tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'gs')
39 |         saver2 = tf.train.Saver(var_list=var_list)
40 |         saver2.restore(sess, tf.train.latest_checkpoint(hp.logdir + "-2"))
41 |         print("SSRN Restored!")
42 | 
43 |         # Feed Forward
44 |         ## mel
45 |         Y = np.zeros((len(L), hp.max_T, hp.n_mels), np.float32)
46 |         prev_max_attentions = np.zeros((len(L),), np.int32)
47 |         for j in tqdm(range(hp.max_T)):
48 |             _gs, _Y, _max_attentions, _alignments = \
49 |                 sess.run([g.global_step, g.Y, g.max_attentions, g.alignments],
50 |                          {g.L: L,
51 |                           g.mels: Y,
52 |                           g.prev_max_attentions: prev_max_attentions})
53 |             Y[:, j, :] = _Y[:, j, :]
54 |             prev_max_attentions = _max_attentions[:, j]
55 | 
56 |         # Get magnitude
57 |         Z = sess.run(g.Z, {g.Y: Y})
58 | 
59 |         # Generate wav files
60 |         if not os.path.exists(hp.sampledir): os.makedirs(hp.sampledir)
61 |         for i, mag in enumerate(Z):
62 |             print("Working on file", i+1)
63 |             wav = spectrogram2wav(mag)
64 |             write(hp.sampledir + "/{}.wav".format(i+1), hp.sr, wav)
65 | 
66 | if __name__ == '__main__':
67 |     synthesize()
68 |     print("Done")
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # /usr/bin/python2
  3 | '''
  4 | By kyubyong park. kbpark.linguist@gmail.com. 
  5 | https://www.github.com/kyubyong/multi-speech-corpora/dc_tts
  6 | '''
  7 | 
  8 | from __future__ import print_function
  9 | 
 10 | from tqdm import tqdm
 11 | 
 12 | from data_load import get_batch, load_vocab
 13 | from hyperparams import Hyperparams as hp
 14 | from modules import *
 15 | from networks import TextEnc, AudioEnc, AudioDec, Attention, SSRN
 16 | import tensorflow as tf
 17 | from utils import *
 18 | import sys
 19 | 
 20 | 
 21 | class Graph:
 22 |     def __init__(self, num=1, mode="train"):
 23 |         '''
 24 |         Args:
 25 |           num: Either 1 or 2. 1 for Text2Mel 2 for SSRN.
 26 |           mode: Either "train" or "synthesize".
 27 |         '''
 28 |         # Load vocabulary
 29 |         self.char2idx, self.idx2char = load_vocab()
 30 | 
 31 |         # Set flag
 32 |         training = True if mode=="train" else False
 33 | 
 34 |         # Graph
 35 |         # Data Feeding
 36 |         ## L: Text. (B, N), int32
 37 |         ## mels: Reduced melspectrogram. (B, T/r, n_mels) float32
 38 |         ## mags: Magnitude. (B, T, n_fft//2+1) float32
 39 |         if mode=="train":
 40 |             self.L, self.mels, self.mags, self.fnames, self.num_batch = get_batch()
 41 |             self.prev_max_attentions = tf.ones(shape=(hp.B,), dtype=tf.int32)
 42 |             self.gts = tf.convert_to_tensor(guided_attention())
 43 |         else:  # Synthesize
 44 |             self.L = tf.placeholder(tf.int32, shape=(None, None))
 45 |             self.mels = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels))
 46 |             self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None,))
 47 | 
 48 |         if num==1 or (not training):
 49 |             with tf.variable_scope("Text2Mel"):
 50 |                 # Get S or decoder inputs. (B, T//r, n_mels)
 51 |                 self.S = tf.concat((tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1)
 52 | 
 53 |                 # Networks
 54 |                 with tf.variable_scope("TextEnc"):
 55 |                     self.K, self.V = TextEnc(self.L, training=training)  # (N, Tx, e)
 56 | 
 57 |                 with tf.variable_scope("AudioEnc"):
 58 |                     self.Q = AudioEnc(self.S, training=training)
 59 | 
 60 |                 with tf.variable_scope("Attention"):
 61 |                     # R: (B, T/r, 2d)
 62 |                     # alignments: (B, N, T/r)
 63 |                     # max_attentions: (B,)
 64 |                     self.R, self.alignments, self.max_attentions = Attention(self.Q, self.K, self.V,
 65 |                                                                              mononotic_attention=(not training),
 66 |                                                                              prev_max_attentions=self.prev_max_attentions)
 67 |                 with tf.variable_scope("AudioDec"):
 68 |                     self.Y_logits, self.Y = AudioDec(self.R, training=training) # (B, T/r, n_mels)
 69 |         else:  # num==2 & training. Note that during training,
 70 |             # the ground truth melspectrogram values are fed.
 71 |             with tf.variable_scope("SSRN"):
 72 |                 self.Z_logits, self.Z = SSRN(self.mels, training=training)
 73 | 
 74 |         if not training:
 75 |             # During inference, the predicted melspectrogram values are fed.
 76 |             with tf.variable_scope("SSRN"):
 77 |                 self.Z_logits, self.Z = SSRN(self.Y, training=training)
 78 | 
 79 |         with tf.variable_scope("gs"):
 80 |             self.global_step = tf.Variable(0, name='global_step', trainable=False)
 81 | 
 82 |         if training:
 83 |             if num==1: # Text2Mel
 84 |                 # mel L1 loss
 85 |                 self.loss_mels = tf.reduce_mean(tf.abs(self.Y - self.mels))
 86 | 
 87 |                 # mel binary divergence loss
 88 |                 self.loss_bd1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.Y_logits, labels=self.mels))
 89 | 
 90 |                 # guided_attention loss
 91 |                 self.A = tf.pad(self.alignments, [(0, 0), (0, hp.max_N), (0, hp.max_T)], mode="CONSTANT", constant_values=-1.)[:, :hp.max_N, :hp.max_T]
 92 |                 self.attention_masks = tf.to_float(tf.not_equal(self.A, -1))
 93 |                 self.loss_att = tf.reduce_sum(tf.abs(self.A * self.gts) * self.attention_masks)
 94 |                 self.mask_sum = tf.reduce_sum(self.attention_masks)
 95 |                 self.loss_att /= self.mask_sum
 96 | 
 97 |                 # total loss
 98 |                 self.loss = self.loss_mels + self.loss_bd1 + self.loss_att
 99 | 
100 |                 tf.summary.scalar('train/loss_mels', self.loss_mels)
101 |                 tf.summary.scalar('train/loss_bd1', self.loss_bd1)
102 |                 tf.summary.scalar('train/loss_att', self.loss_att)
103 |                 tf.summary.image('train/mel_gt', tf.expand_dims(tf.transpose(self.mels[:1], [0, 2, 1]), -1))
104 |                 tf.summary.image('train/mel_hat', tf.expand_dims(tf.transpose(self.Y[:1], [0, 2, 1]), -1))
105 |             else: # SSRN
106 |                 # mag L1 loss
107 |                 self.loss_mags = tf.reduce_mean(tf.abs(self.Z - self.mags))
108 | 
109 |                 # mag binary divergence loss
110 |                 self.loss_bd2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.Z_logits, labels=self.mags))
111 | 
112 |                 # total loss
113 |                 self.loss = self.loss_mags + self.loss_bd2
114 | 
115 |                 tf.summary.scalar('train/loss_mags', self.loss_mags)
116 |                 tf.summary.scalar('train/loss_bd2', self.loss_bd2)
117 |                 tf.summary.image('train/mag_gt', tf.expand_dims(tf.transpose(self.mags[:1], [0, 2, 1]), -1))
118 |                 tf.summary.image('train/mag_hat', tf.expand_dims(tf.transpose(self.Z[:1], [0, 2, 1]), -1))
119 | 
120 |             # Training Scheme
121 |             self.lr = learning_rate_decay(hp.lr, self.global_step)
122 |             self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
123 |             tf.summary.scalar("lr", self.lr)
124 | 
125 |             ## gradient clipping
126 |             self.gvs = self.optimizer.compute_gradients(self.loss)
127 |             self.clipped = []
128 |             for grad, var in self.gvs:
129 |                 grad = tf.clip_by_value(grad, -1., 1.)
130 |                 self.clipped.append((grad, var))
131 |                 self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step)
132 | 
133 |             # Summary
134 |             self.merged = tf.summary.merge_all()
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     # argument: 1 or 2. 1 for Text2mel, 2 for SSRN.
139 |     num = int(sys.argv[1])
140 | 
141 |     g = Graph(num=num); print("Training Graph loaded")
142 | 
143 |     logdir = hp.logdir + "-" + str(num)
144 |     sv = tf.train.Supervisor(logdir=logdir, save_model_secs=0, global_step=g.global_step)
145 |     with sv.managed_session() as sess:
146 |         while 1:
147 |             for _ in tqdm(range(g.num_batch), total=g.num_batch, ncols=70, leave=False, unit='b'):
148 |                 gs, _ = sess.run([g.global_step, g.train_op])
149 | 
150 |                 # Write checkpoint files at every 1k steps
151 |                 if gs % 1000 == 0:
152 |                     sv.saver.save(sess, logdir + '/model_gs_{}'.format(str(gs // 1000).zfill(3) + "k"))
153 | 
154 |                     if num==1:
155 |                         # plot alignment
156 |                         alignments = sess.run(g.alignments)
157 |                         plot_alignment(alignments[0], str(gs // 1000).zfill(3) + "k", logdir)
158 | 
159 |             # break
160 |             if gs > hp.num_iterations: break
161 | 
162 |     print("Done")
163 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #/usr/bin/python2
  3 | '''
  4 | By kyubyong park. kbpark.linguist@gmail.com. 
  5 | https://www.github.com/kyubyong/kss
  6 | '''
  7 | from __future__ import print_function, division
  8 | 
  9 | import numpy as np
 10 | import librosa
 11 | import os, copy
 12 | import matplotlib
 13 | matplotlib.use('pdf')
 14 | import matplotlib.pyplot as plt
 15 | from scipy import signal
 16 | 
 17 | from hyperparams import Hyperparams as hp
 18 | import tensorflow as tf
 19 | 
 20 | def get_spectrograms(fpath):
 21 |     '''Parse the wave file in `fpath` and
 22 |     Returns normalized melspectrogram and linear spectrogram.
 23 | 
 24 |     Args:
 25 |       fpath: A string. The full path of a sound file.
 26 | 
 27 |     Returns:
 28 |       mel: A 2d array of shape (T, n_mels) and dtype of float32.
 29 |       mag: A 2d array of shape (T, 1+n_fft/2) and dtype of float32.
 30 |     '''
 31 |     # Loading sound file
 32 |     y, sr = librosa.load(fpath, sr=hp.sr)
 33 | 
 34 |     # Trimming
 35 |     y, _ = librosa.effects.trim(y, top_db=40)
 36 | 
 37 |     # Preemphasis
 38 |     y = np.append(y[0], y[1:] - hp.preemphasis * y[:-1])
 39 | 
 40 |     # stft
 41 |     linear = librosa.stft(y=y,
 42 |                           n_fft=hp.n_fft,
 43 |                           hop_length=hp.hop_length,
 44 |                           win_length=hp.win_length)
 45 | 
 46 |     # magnitude spectrogram
 47 |     mag = np.abs(linear)  # (1+n_fft//2, T)
 48 | 
 49 |     # mel spectrogram
 50 |     mel_basis = librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels)  # (n_mels, 1+n_fft//2)
 51 |     mel = np.dot(mel_basis, mag)  # (n_mels, t)
 52 | 
 53 |     # to decibel
 54 |     mel = 20 * np.log10(np.maximum(1e-5, mel))
 55 |     mag = 20 * np.log10(np.maximum(1e-5, mag))
 56 | 
 57 |     # normalize
 58 |     mel = np.clip((mel - hp.ref_db + hp.max_db) / hp.max_db, 1e-8, 1)
 59 |     mag = np.clip((mag - hp.ref_db + hp.max_db) / hp.max_db, 1e-8, 1)
 60 | 
 61 |     # Transpose
 62 |     mel = mel.T.astype(np.float32)  # (T, n_mels)
 63 |     mag = mag.T.astype(np.float32)  # (T, 1+n_fft//2)
 64 | 
 65 |     return mel, mag
 66 | 
 67 | def spectrogram2wav(mag):
 68 |     '''# Generate wave file from linear magnitude spectrogram
 69 | 
 70 |     Args:
 71 |       mag: A numpy array of (T, 1+n_fft//2)
 72 | 
 73 |     Returns:
 74 |       wav: A 1-D numpy array.
 75 |     '''
 76 |     # transpose
 77 |     mag = mag.T
 78 | 
 79 |     # de-noramlize
 80 |     mag = (np.clip(mag, 0, 1) * hp.max_db) - hp.max_db + hp.ref_db
 81 | 
 82 |     # to amplitude
 83 |     mag = np.power(10.0, mag * 0.05)
 84 | 
 85 |     # wav reconstruction
 86 |     wav = griffin_lim(mag**hp.power)
 87 | 
 88 |     # de-preemphasis
 89 |     wav = signal.lfilter([1], [1, -hp.preemphasis], wav)
 90 | 
 91 |     # trim
 92 |     wav = trim(wav)
 93 | 
 94 |     return wav.astype(np.float32)
 95 | 
 96 | def griffin_lim(spectrogram):
 97 |     '''Applies Griffin-Lim's raw.'''
 98 |     X_best = copy.deepcopy(spectrogram)
 99 |     for i in range(hp.n_iter):
100 |         X_t = invert_spectrogram(X_best)
101 |         est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)
102 |         phase = est / np.maximum(1e-8, np.abs(est))
103 |         X_best = spectrogram * phase
104 |     X_t = invert_spectrogram(X_best)
105 |     y = np.real(X_t)
106 | 
107 |     return y
108 | 
109 | def invert_spectrogram(spectrogram):
110 |     '''Applies inverse fft.
111 |     Args:
112 |       spectrogram: [1+n_fft//2, t]
113 |     '''
114 |     return librosa.istft(spectrogram, hp.hop_length, win_length=hp.win_length, window="hann")
115 | 
116 | def plot_alignment(alignment, gs, dir=hp.logdir):
117 |     """Plots the alignment.
118 | 
119 |     Args:
120 |       alignment: A numpy array with shape of (encoder_steps, decoder_steps)
121 |       gs: (int) global step.
122 |       dir: Output path.
123 |     """
124 |     if not os.path.exists(dir): os.mkdir(dir)
125 | 
126 |     fig, ax = plt.subplots()
127 |     im = ax.imshow(alignment)
128 | 
129 |     fig.colorbar(im)
130 |     plt.title('{} Steps'.format(gs))
131 |     plt.savefig('{}/alignment_{}.png'.format(dir, gs), format='png')
132 | 
133 | def guided_attention(g=0.2):
134 |     '''Guided attention. Refer to page 3 on the paper.'''
135 |     W = np.zeros((hp.max_N, hp.max_T), dtype=np.float32)
136 |     for n_pos in range(W.shape[0]):
137 |         for t_pos in range(W.shape[1]):
138 |             W[n_pos, t_pos] = 1 - np.exp(-(t_pos / float(hp.max_T) - n_pos / float(hp.max_N)) ** 2 / (2 * g * g))
139 |     return W
140 | 
141 | def learning_rate_decay(init_lr, global_step, warmup_steps = 4000.0):
142 |     '''Noam scheme from tensor2tensor'''
143 |     step = tf.to_float(global_step + 1)
144 |     return init_lr * warmup_steps**0.5 * tf.minimum(step * warmup_steps**-1.5, step**-0.5)
145 | 
146 | def load_spectrograms(fpath):
147 |     '''Read the wave file in `fpath`
148 |     and extracts spectrograms'''
149 | 
150 |     fname = os.path.basename(fpath)
151 |     mel, mag = get_spectrograms(fpath)
152 |     t = mel.shape[0]
153 | 
154 |     # Marginal padding for reduction shape sync.
155 |     num_paddings = hp.r - (t % hp.r) if t % hp.r != 0 else 0
156 |     mel = np.pad(mel, [[0, num_paddings], [0, 0]], mode="constant")
157 |     mag = np.pad(mag, [[0, num_paddings], [0, 0]], mode="constant")
158 | 
159 |     # Reduction
160 |     mel = mel[::hp.r, :]
161 |     return fname, mel, mag
162 | 
163 | #This is adapted by
164 | # https://github.com/keithito/tacotron/blob/master/util/audio.py#L55-62
165 | def trim(wav, top_db=40, min_silence_sec=0.8):
166 |     frame_length = int(hp.sr * min_silence_sec)
167 |     hop_length = int(frame_length / 4)
168 |     endpoint = librosa.effects.split(wav, frame_length=frame_length,
169 |                                hop_length=hop_length,
170 |                                top_db=top_db)[0, 1]
171 |     return wav[:endpoint]
172 | 
173 | def load_j2hcj():
174 |     '''
175 |     Arg:
176 |       jamo: A Hangul Jamo character(0x01100-0x011FF)
177 | 
178 |     Returns:
179 |       A dictionary that converts jamo into Hangul Compatibility Jamo(0x03130 - 0x0318F) Character
180 |     '''
181 |     jamo = u'''␀␃ !,.?ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆩᆪᆫᆬᆭᆮᆯᆰᆱᆲᆴᆶᆷᆸᆹᆺᆻᆼᆽᆾᆿᇀᇁᇂ'''
182 |     hcj  = u'''␀␃ !,.?ㄱㄲㄴㄷㄸㄹㅁㅂㅃㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣㄱㄲㄳㄴㄵㄶㄷㄹㄺㄻㄼㄾㅀㅁㅂㅄㅅㅆㅇㅈㅊㅋㅌㅍㅎ'''
183 | 
184 |     assert len(jamo) == len(hcj)
185 |     j2hcj = {j: h for j, h in zip(jamo, hcj)}
186 |     return j2hcj
187 | 
188 | def load_j2sj():
189 |     '''
190 |     Arg:
191 |       jamo: A Hangul Jamo character(0x01100-0x011FF)
192 | 
193 |     Returns:
194 |       A dictionary that decomposes double consonants into two single consonants.
195 |     '''
196 |     jamo = u'''␀␃ !,.?ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆩᆪᆫᆬᆭᆮᆯᆰᆱᆲᆴᆶᆷᆸᆹᆺᆻᆼᆽᆾᆿᇀᇁᇂ'''
197 |     sj = u'''␀|␃| |!|,|.|?|ᄀ|ᄀᄀ|ᄂ|ᄃ|ᄃᄃ|ᄅ|ᄆ|ᄇ|ᄇᄇ|ᄉ|ᄉᄉ|ᄋ|ᄌ|ᄌᄌ|ᄎ|ᄏ|ᄐ|ᄑ|ᄒ|ᅡ|ᅢ|ᅣ|ᅤ|ᅥ|ᅦ|ᅧ|ᅨ|ᅩ|ᅪ|ᅫ|ᅬ|ᅭ|ᅮ|ᅯ|ᅰ|ᅱ|ᅲ|ᅳ|ᅴ|ᅵ|ᆨ|ᆨᆨ|ᆨᆺ|ᆫ|ᆫᆽ|ᆫᇂ|ᆮ|ᆯ|ᆯᆨ|ᆯᆷ|ᆯᆸ|ᆯᇀ|ᆯᇂ|ᆷ|ᆸ|ᆸᆺ|ᆺ|ᆺᆺ|ᆼ|ᆽ|ᆾ|ᆿ|ᇀ|ᇁ|ᇂ'''
198 | 
199 |     assert len(jamo)==len(sj.split("|"))
200 |     j2sj = {j: s for j, s in zip(jamo, sj.split("|"))}
201 |     return j2sj
202 | 
203 | def load_j2shcj():
204 |     '''
205 |     Arg:
206 |       jamo: A Hangul Jamo character(0x01100-0x011FF)
207 | 
208 |     Returns:
209 |       A dictionary that converts jamo into Hangul Compatibility Jamo(0x03130 - 0x0318F) Character.
210 |       Double consonants are further decomposed into single consonants.
211 |     '''
212 |     jamo = u'''␀␃ !,.?ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆩᆪᆫᆬᆭᆮᆯᆰᆱᆲᆴᆶᆷᆸᆹᆺᆻᆼᆽᆾᆿᇀᇁᇂ'''
213 |     shcj = u'''␀|␃| |!|,|.|?|ㄱ|ㄱㄱ|ㄴ|ㄷ|ㄷㄷ|ㄹ|ㅁ|ㅂ|ㅂㅂ|ㅅ|ㅅㅅ|ㅇ|ㅈ|ㅈㅈ|ㅊ|ㅋ|ㅌ|ㅍ|ㅎ|ㅏ|ㅐ|ㅑ|ㅒ|ㅓ|ㅔ|ㅕ|ㅖ|ㅗ|ㅘ|ㅙ|ㅚ|ㅛ|ㅜ|ㅝ|ㅞ|ㅟ|ㅠ|ㅡ|ㅢ|ㅣ|ㄱ|ㄱㄱ|ㄱㅅ|ㄴ|ㄴㅈ|ㄴㅎ|ㄷ|ㄹ|ㄹㄱ|ㄹㅁ|ㄹㅂ|ㄹㅌ|ㄹㅎ|ㅁ|ㅂ|ㅂㅅ|ㅅ|ㅅㅅ|ㅇ|ㅈ|ㅊ|ㅋ|ㅌ|ㅍ|ㅎ'''
214 | 
215 |     assert len(jamo)==len(shcj.split("|"))
216 |     j2shcj = {j: s for j, s in zip(jamo, shcj.split("|"))}
217 |     return j2shcj
218 | 
219 | # def jamo2syl(jamoset):
220 | #     """Inspired by Jamo.
221 | #     Return the Hangul character for the given jamo characters.
222 | #     """
223 | #     _JAMO_LEAD_OFFSET = 0x10ff
224 | #     _JAMO_VOWEL_OFFSET = 0x1160
225 | #     _JAMO_TAIL_OFFSET = 0x11a7
226 | #     _JAMO_OFFSET = 44032
227 | #     assert len(jamoset) in (2, 3), "CANNOT compose a syllable!"
228 | #
229 | #     onset = ord(jamoset[0]) - _JAMO_LEAD_OFFSET
230 | #     vowel = ord(jamoset[1]) - _JAMO_VOWEL_OFFSET
231 | #     coda = ord(jamoset[2]) - _JAMO_TAIL_OFFSET if len(jamoset)==3 else 0
232 | #     return unichr(coda + (vowel - 1) * 28 + (onset - 1) * 588 + _JAMO_OFFSET)
233 | #
234 | # def compose(jamotext):
235 | #     converted, jamoset = "", ""
236 | #     for char in jamotext:
237 | #         codepoint = ord(char)
238 | #         if 4352 <= codepoint <= 4447:  # Hangul Onset
239 | #             if jamoset:
240 | #                 converted += jamo2syl(jamoset)
241 | #             jamoset = char
242 | #         elif 4448 <= codepoint <= 4519: # Hangul Nuclues
243 | #             jamoset += char
244 | #         elif 4520 <= codepoint <= 4607: # Hangul Coda
245 | #             jamoset += char
246 | #             converted += jamo2syl(jamoset)
247 | #             jamoset = ""
248 | #         else: # Non Hangul
249 | #             if jamoset:
250 | #                 converted += jamo2syl(jamoset)
251 | #                 jamoset = ""
252 | #             converted += char
253 | #     return converted
254 | 


--------------------------------------------------------------------------------