├── CMUDict.py
├── KORDict.py
├── LICENSE
├── README.md
├── build_vocab
    ├── build_vocab.py
    └── data_utils.py
├── main.py
├── model.py
└── utils.py


/CMUDict.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import torchtext.data as data
 4 | 
 5 | class CMUDict(data.Dataset):
 6 |     def __init__(self, data_lines, g_field, p_field):
 7 |         fields = [('grapheme', g_field), ('phoneme', p_field)]
 8 |         examples = []  # maybe ignore '...-1' grapheme
 9 |         for line in data_lines:
10 |             grapheme, phoneme = line.split(maxsplit=1)
11 |             examples.append(data.Example.fromlist([grapheme, phoneme],
12 |                                                   fields))
13 |         self.sort_key = lambda x: len(x.grapheme)
14 |         super(CMUDict, self).__init__(examples, fields)
15 | 
16 |     @classmethod
17 |     def splits(cls, path, g_field, p_field, seed=None):
18 |         import random
19 | 
20 |         if seed is not None:
21 |             random.seed(seed)
22 |         with open(path) as f:
23 |             lines = f.readlines()
24 |         random.shuffle(lines)
25 |         train_lines, val_lines, test_lines = [], [], []
26 |         for i, line in enumerate(lines):
27 |             if i % 20 == 0:
28 |                 val_lines.append(line)
29 |             elif i % 20 < 3:
30 |                 test_lines.append(line)
31 |             else:
32 |                 train_lines.append(line)
33 |         train_data = cls(train_lines, g_field, p_field)
34 |         val_data = cls(val_lines, g_field, p_field)
35 |         test_data = cls(test_lines, g_field, p_field)
36 |         return (train_data, val_data, test_data)
37 | 
38 | 


--------------------------------------------------------------------------------
/KORDict.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import torchtext.data as data
 4 | import os
 5 | import random
 6 | 
 7 | class KORDict(data.Dataset):
 8 |     def __init__(self, data_lines, g_field, p_field):
 9 |         fields = [('grapheme', g_field), ('phoneme', p_field)]
10 |         examples = []  # maybe ignore '...-1' grapheme
11 |         for line in data_lines:
12 |             grapheme, phoneme = line.strip().split(maxsplit=1)
13 |             examples.append(data.Example.fromlist([grapheme, phoneme],
14 |                                                   fields))
15 |         self.sort_key = lambda x: len(x.grapheme)
16 |         super(KORDict, self).__init__(examples, fields)
17 | 
18 |     @classmethod
19 |     def splits(cls, path,train,valid,test, g_field, p_field, seed=None):
20 | 
21 |         train_path = os.path.join(path, train)
22 | 
23 |         test_path = os.path.join(path, test)
24 | 
25 | 
26 |         if seed is not None:
27 |             random.seed(seed)
28 |         with open(train_path,encoding='utf-8') as f:
29 |             lines = f.readlines()
30 |         random.shuffle(lines)
31 |         train_lines, val_lines, test_lines = [], [], []
32 | 
33 |         if valid:
34 |             valid_path = os.path.join(path, valid)
35 |             if seed is not None:
36 |                 random.seed(seed)
37 |             with open(valid_path,encoding='utf-8') as f:
38 |                 v_lines = f.readlines()
39 |             random.shuffle(v_lines)
40 |             for line in v_lines:
41 |                 val_lines.append(line)
42 | 
43 |         if test_path:
44 |             test_path = os.path.join(path, test)
45 |             if seed is not None:
46 |                 random.seed(seed)
47 |             with open(test_path,encoding='utf-8') as f:
48 |                 v_lines = f.readlines()
49 |             random.shuffle(v_lines)
50 |             for line in v_lines:
51 |                 test_lines.append(line)
52 | 
53 | 
54 |         for i, line in enumerate(lines):
55 |             if i % 20 == 0 and not valid:
56 |                 val_lines.append(line)
57 |             elif i % 20 < 3 and not test:
58 |                 test_lines.append(line)
59 |             else:
60 |                 train_lines.append(line)
61 |         train_data = cls(train_lines, g_field, p_field)
62 |         val_data = cls(val_lines, g_field, p_field)
63 |         test_data = cls(test_lines, g_field, p_field)
64 |         return (train_data, val_data, test_data)
65 | 
66 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # (semi) Grapheme-to-Phoneme (G2P) - seq2seq model using PyTorch for Korean
 2 | 한국어 문자를 위한 G2P seq2seq 알고리즘의 구현 코드의 한국어 용입니다. 아주 minor 한 개선으로 기존 코드 살짝 수정한 수준입니다.
 3 | 
 4 | 이 코드는 목적은 기존의 G2P seq2seq 모델을 이용해서,
 5 | 한국어 자,모음 기준으로 표준어 형태의 문자열을 발음 나는 문자열로 변환 혹은 반대로 발음 문자열을 표준어 형태의 문자열로 변환하는 데에 있습니다. 
 6 | 이것은 캐릭터 레벨의 CTC를 학습하기 위해, 문자열을 발음 형태로 변환하거나 혹은 언어모델이 결합되지 않은 CTC 디코딩의 결과물을 문자열로 변환하는 용도로 고려되었습니다.
 7 | 정확하게는 G2P라 할수 없지만 표음문자인 한국어 특성상 semi G2P라고 표현하였습니다. 
 8 | 
 9 | 또한 이 코드는 코드 원래의 목적대로 DB만 있다면 IPA 형식의 G2P로 사용하는 것에도 크게 문제가 없을 것으로 예측 됩니다.(계획 중입니다)
10 | 
11 | 이러한 코드가 같은 분야를 연구하는 분들에게 도움이 될 수 있었으면 합니다.
12 |   
13 | 
14 | ## Credits
15 | Original paper:
16 | - Luong's paper:
17 | ```
18 | @article{
19 |   author    = {Minh-Thang Luong, Hieu Pham and Christopher D. Manning},
20 |   title     = {Effective Approaches to Attention-based Neural Machine Translation},
21 |   journal   = {CoRR},
22 |   volume    = {abs/1508.04025},
23 |   year      = {2015},
24 | }
25 | ```
26 | - Kaisheng's paper:
27 | ```
28 | @article{
29 |   author    = {Kaisheng Yao and Geoffrey Zweig},
30 |   title     = {Sequence-to-Sequence Neural Net Models for Grapheme-to-Phoneme Conversion},
31 |   journal   = {CoRR},
32 |   volume    = {abs/1506.00196},
33 |   year      = {2015},
34 | }
35 | ```
36 | 
37 | Also, use the part of code:
38 | - [fehiepsi's git repository](https://fehiepsi.github.io/blog/grapheme-to-phoneme/)
39 |    - Baseline code
40 |    - [Beamsearch](https://github.com/MaximumEntropy/Seq2Seq-PyTorch/)
41 | - [cmusphinx git repository](https://github.com/cmusphinx/g2p-seq2seq)
42 |    - Build vocab utils
43 |    
44 | ## Requirements
45 | - Python (3.6 maybe 3.5 이상이면 가능할 듯)
46 | - NumPy
47 | - Pytorch(0.2) and torchtext 
48 | - [python-Levenshtein](https://github.com/ztane/python-Levenshtein/)
49 | - [hangul-utils](https://github.com/kaniblu/hangul-utils/)
50 | 
51 | ## Features
52 | - [x] 표준 문장열 --> 발음 문장열
53 | - [x] 발음 문장열 --> 표준 문장열(위의 모델과 인풋만 반대로 넣어주면 가능하며, build_vocab 에서 생성 가능합니다.)
54 | - [ ] 한국어 G2P (IPA 형식)
55 | 
56 | ## Usage
57 | ### Preperation:
58 | 1) 학습 데이터를 준비합니다 형식은 다음과 같습니다.
59 | ```
60 | train_text.txt
61 | 무슨 일이 있어야 할까
62 | ...
63 | train_trans.txt
64 | 무슨 이리 이써야 할까 
65 | ...
66 | ```
67 | 2) build_vocab을 실행 합니다.
68 | ```
69 | python build_vocab.py --model_dir="../prepared_data/" --train_file="../naive_data/train"
70 | ```
71 | 3) main.py를 실행합니다.
72 | ```
73 | 예시 : 발음 문자열 to 표준 문자열 
74 | > 바라보앋따 (입력)
75 | = 바라보았다 (정답)
76 | < 바라보았다 (출력)
77 | 반대의 케이스도 입력만 조정해주면 가능하며, 또한 Beamserch의 조정함으로써 후보군을 여러개 참조하도록 변환가능하합니다(문자열에 대한 여러개의 발음열 필요시).
78 | ```
79 | 
80 | ## Authors
81 | qqueing@gmail.com( or kindsinu@naver.com)
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/build_vocab/build_vocab.py:
--------------------------------------------------------------------------------
 1 | """Standalone script to generate word vocabularies from monolingual corpus."""
 2 | 
 3 | import argparse
 4 | 
 5 | import data_utils
 6 | 
 7 | 
 8 | def main():
 9 |   parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
10 |   parser.add_argument(
11 |       "--model_dir", default="../prepared_data/", help="Data for training")
12 |   parser.add_argument(
13 |       "--train_file", default="../naive_data/train", help="Data for training")
14 |   parser.add_argument(
15 |       "--valid_file", default="", help="Data for validation")
16 |   parser.add_argument(
17 |       "--test_file", default="", help="Data for test")
18 | 
19 |   args = parser.parse_args()
20 | 
21 |   data_utils.prepare_g2p_from_naive_data(args.model_dir,args.train_file, args.valid_file,args.test_file)
22 | 
23 | if __name__ == "__main__":
24 |   main()
25 | 


--------------------------------------------------------------------------------
/build_vocab/data_utils.py:
--------------------------------------------------------------------------------
  1 | #-*- coding: utf-8 -*-
  2 | # Copyright 2016 AC Technologies LLC. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | 
 17 | """Utilities for tokenizing, creation vocabularies."""
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import os
 23 | import codecs
 24 | 
 25 | # Special vocabulary symbols - we always put them at the start.
 26 | _PAD = "_PAD"
 27 | _GO = "_GO"
 28 | _EOS = "_EOS"
 29 | _UNK = "_UNK"
 30 | _START_VOCAB = [_PAD, _GO, _EOS, _UNK]
 31 | 
 32 | 
 33 | def create_vocabulary(data):
 34 |   """Create vocabulary from input data.
 35 |   Input data is assumed to contain one word per line.
 36 | 
 37 |   Args:
 38 |     data: word list that will be used to create vocabulary.
 39 | 
 40 |   Rerurn:
 41 |     vocab: vocabulary dictionary. In this dictionary keys are symbols
 42 |            and values are their indexes.
 43 |   """
 44 |   vocab = {}
 45 |   for line in data:
 46 |     for item in line:
 47 |       if item in vocab:
 48 |         vocab[item] += 1
 49 |       else:
 50 |         vocab[item] = 1
 51 |   vocab_list = _START_VOCAB + sorted(vocab)
 52 |   vocab = dict([(x, y) for (y, x) in enumerate(vocab_list)])
 53 |   return vocab
 54 | 
 55 | def create_word(data):
 56 |   """Create word from input data.
 57 |   Input data is assumed to contain one word per line.
 58 | 
 59 |   Args:
 60 |     data: word list that will be used to create vocabulary.
 61 | 
 62 |   Rerurn:
 63 |     vocab: vocabulary dictionary. In this dictionary keys are symbols
 64 |            and values are their indexes.
 65 |   """
 66 |   vocab = {}
 67 |   for line in data:
 68 | 
 69 |     if ''.join(line) in vocab:
 70 |       vocab[''.join(line)] += 1
 71 |     else:
 72 |       vocab[''.join(line)] = 1
 73 |   vocab_list = _START_VOCAB + sorted(vocab)
 74 |   vocab = dict([(x, y) for (y, x) in enumerate(vocab_list)])
 75 |   return vocab
 76 | 
 77 | 
 78 | def save_vocabulary(vocab, vocabulary_path):
 79 |   """Save vocabulary file in vocabulary_path.
 80 |   We write vocabulary to vocabulary_path in a one-token-per-line format,
 81 |   so that later token in the first line gets id=0, second line gets id=1,
 82 |   and so on.
 83 | 
 84 |   Args:
 85 |     vocab: vocabulary dictionary.
 86 |     vocabulary_path: path where the vocabulary will be created.
 87 | 
 88 |   """
 89 |   print("Creating vocabulary %s" % (vocabulary_path))
 90 |   with codecs.open(vocabulary_path, "w", "utf-8") as vocab_file:
 91 |     for symbol in sorted(vocab, key=vocab.get):
 92 |       vocab_file.write(symbol + '\n')
 93 | 
 94 | def save_paired(vocab,vocab_paired, vocabulary_path):
 95 |   """Save vocabulary paired file in vocabulary_path.
 96 |   We write vocabulary to vocabulary_path in a one-token-per-line format,
 97 |   so that later token in the first line gets id=0, second line gets id=1,
 98 |   and so on.
 99 | 
100 |   Args:
101 |     vocab: vocabulary dictionary.
102 |     vocab_paired: vocabulary dictionary paired.
103 |     vocabulary_path: path where the vocabulary will be created.
104 | 
105 |   """
106 |   print("Creating vocabulary %s" % (vocabulary_path))
107 |   with codecs.open(vocabulary_path, "w", "utf-8") as vocab_file:
108 |     for vocab1,vocab2 in zip(vocab,vocab_paired):
109 |       vocab_file.write(''.join(vocab1)+' '+''.join(vocab2) + '\n')
110 | 
111 | 
112 | def split_to_grapheme_phoneme(inp_dictionary):
113 |   """Split input dictionary into two separate lists with graphemes and phonemes.
114 | 
115 |   Args:
116 |     inp_dictionary: input dictionary.
117 |   """
118 |   graphemes, phonemes = [], []
119 |   for line in inp_dictionary:
120 |     split_line = line.strip().split()
121 |     if len(split_line) > 1:
122 |       graphemes.append(list(split_line[0]))
123 |       phonemes.append(split_line[1:])
124 |   return graphemes, phonemes
125 | 
126 | 
127 | def collect_pronunciations(dic_lines):
128 |   '''Create dictionary mapping word to its different pronounciations.
129 |   '''
130 |   dic = {}
131 |   for line in dic_lines:
132 |     lst = line.strip().split()
133 |     if len(lst) > 1:
134 |       if lst[0] not in dic:
135 |         dic[lst[0]] = [" ".join(lst[1:])]
136 |       else:
137 |         if not " ".join(lst[1:]) in dic[lst[0]]:
138 |           dic[lst[0]].append(" ".join(lst[1:]))
139 |     elif len(lst) == 1:
140 |       print("WARNING: No phonemes for word '%s' line ignored" % (lst[0]))
141 |   return dic
142 | 
143 | 
144 | def split_dictionary_from_data(train_file, valid_file=None, test_file=None):
145 |   """Split source dictionary to train, validation and test sets.
146 |   """
147 |   with codecs.open(train_file+"_text.txt", "r", "utf-8") as f:
148 |     texts = f.readlines()
149 |   with codecs.open(train_file+"_trans.txt", "r", "utf-8") as f:
150 |     trans = f.readlines()
151 |   source_dic = []
152 |   for text,tran in zip(texts,trans):
153 |     for text_word, tran_word in zip(text.split(),tran.split()):
154 |       source_dic.append(text_word+" "+" ".join(tran_word))
155 | 
156 |   train_dic, valid_dic, test_dic = [], [], []
157 |   if valid_file:
158 |     with codecs.open(valid_file + "_text.txt", "r", "utf-8") as f:
159 |       texts = f.readlines()
160 |     with codecs.open(valid_file + "_trans.txt", "r", "utf-8") as f:
161 |       trans = f.readlines()
162 |       valid_dic = []
163 |     for text, tran in zip(texts, trans):
164 |       for text_word, tran_word in zip(text.split(), tran.split()):
165 |         valid_dic.append(text_word + " " + " ".join(tran_word))
166 |   if test_file:
167 |     with codecs.open(test_file + "_text.txt", "r", "utf-8") as f:
168 |       texts = f.readlines()
169 |     with codecs.open(test_file + "_trans.txt", "r", "utf-8") as f:
170 |       trans = f.readlines()
171 |       test_dic = []
172 |     for text, tran in zip(texts, trans):
173 |       for text_word, tran_word in zip(text.split(), tran.split()):
174 |         test_dic.append(text_word + " " + " ".join(tran_word))
175 | 
176 | 
177 |   dic = collect_pronunciations(source_dic)
178 | 
179 |   # Split dictionary to train, validation and test (if not assigned).
180 |   for i, word in enumerate(dic):
181 |     for pronunciations in dic[word]:
182 |       if i % 20 == 0 and not valid_file:
183 |         valid_dic.append(word + ' ' + pronunciations)
184 |       elif (i % 20 == 1 or i % 20 == 2) and not test_file:
185 |         test_dic.append(word + ' ' + pronunciations)
186 |       else:
187 |         train_dic.append(word + ' ' + pronunciations)
188 |   return train_dic, valid_dic, test_dic
189 | 
190 | def prepare_g2p_from_naive_data(model_dir, train_path, valid_path, test_path):
191 |   """Create vocabularies into model_dir, create ids data lists.
192 | 
193 |   Args:
194 |     model_dir: directory in which the data sets will be stored;
195 |     train_path: path to training dictionary;
196 |     valid_path: path to validation dictionary;
197 |     test_path: path to test dictionary.
198 | 
199 |   """
200 |   # Create train, validation and test sets.
201 |   train_dic, valid_dic, test_dic = split_dictionary_from_data(train_path, valid_path,
202 |                                                     test_path)
203 |   # Split dictionaries into two separate lists with graphemes and phonemes.
204 |   train_gr, train_ph = split_to_grapheme_phoneme(train_dic)
205 |   valid_gr, valid_ph = split_to_grapheme_phoneme(valid_dic)
206 |   test_gr, test_ph = split_to_grapheme_phoneme(test_dic)
207 | 
208 |   # Load/Create vocabularies.
209 |   if not os.path.isdir(model_dir):
210 |     os.makedirs(model_dir)
211 | 
212 |   save_paired(train_gr, train_ph, os.path.join(model_dir, "train_vocab.paired"))
213 |   save_paired(train_ph, train_gr, os.path.join(model_dir, "train_vocab.paired_inverse"))
214 | 
215 |   save_paired(valid_gr, valid_ph, os.path.join(model_dir, "valid_vocab.paired"))
216 |   save_paired(valid_ph, valid_gr, os.path.join(model_dir, "valid_vocab.paired_inverse"))
217 | 
218 |   save_paired(test_gr, test_ph, os.path.join(model_dir, "test_vocab.paired"))
219 |   save_paired(test_ph, test_gr, os.path.join(model_dir, "test_vocab.paired_inverse"))
220 | 
221 | 
222 |   if 1==0:
223 |     ph_vocab = create_vocabulary(train_ph)
224 |     gr_vocab = create_vocabulary(train_gr)
225 |     save_vocabulary(ph_vocab, os.path.join(model_dir, "vocab.phoneme"))
226 |     save_vocabulary(gr_vocab, os.path.join(model_dir, "vocab.grapheme"))
227 | 
228 |   if 1==0:
229 |     word_text = create_word(train_gr)
230 |     word_tras = create_word(train_ph)
231 |     save_vocabulary(word_text, os.path.join(model_dir, "vocab.word_text"))
232 |     save_vocabulary(word_tras, os.path.join(model_dir, "vocab.word_tran"))
233 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import argparse
  4 | import os
  5 | import time
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | 
 10 | import torch.optim as optim
 11 | import torchtext.data as data
 12 | 
 13 | from model import G2P
 14 | from utils import phoneme_error_rate,adjust_learning_rate,Beam
 15 | #from CMUDict import CMUDict as Dict
 16 | from KORDict import KORDict as Dict
 17 | 
 18 | parser = {
 19 |     'data_path': './prepared_data/',
 20 |     'train_data': 'train_vocab.paired_inverse',
 21 |     'valid_data': 'valid_vocab.paired_inverse',
 22 |     'test_data': 'test_vocab.paired_inverse',
 23 |     'epochs': 10,
 24 |     'batch_size': 100,
 25 |     'max_len': 40,  # max length of grapheme/phoneme sequences
 26 |     'beam_size': 3,  # size of beam for beam-search
 27 |     'd_embed': 70,  # embedding dimension
 28 |     'd_hidden': 500,  # hidden dimension
 29 |     'attention': True,  # use attention or not
 30 |     'log_every': 100,  # number of iterations to log and validate training
 31 |     'lr': 0.007,  # initial learning rate
 32 |     'lr_decay': 0.5,  # decay lr when not observing improvement in val_loss
 33 |     'lr_min': 1e-5,  # stop when lr is too low
 34 |     'n_bad_loss': 5,  # number of bad val_loss before decaying
 35 |     'clip': 2.3,  # clip gradient, to avoid exploding gradient
 36 |     'cuda': True,  # using gpu or not
 37 |     'seed': 5,  # initial seed
 38 |     'intermediate_path': './KOR_data_output/',  # path to save models
 39 | }
 40 | 
 41 | #setattr(ex, name, field.preprocess(val))
 42 | iteration = n_total = train_loss = n_bad_loss = 0
 43 | best_val_loss = 10
 44 | init = time.time()
 45 | 
 46 | def train(config, train_iter,val_iter, model, criterion, optimizer, epoch):
 47 |     global iteration, n_total, train_loss, n_bad_loss
 48 |     global init, best_val_loss
 49 | 
 50 |     print("=> EPOCH {}".format(epoch))
 51 |     train_iter.init_epoch()
 52 |     for batch in train_iter:
 53 |         iteration += 1
 54 |         model.train()
 55 | 
 56 |         output, _, __ = model(batch.grapheme, batch.phoneme[:-1].detach())
 57 |         target = batch.phoneme[1:]
 58 |         loss = criterion(output.view(output.size(0) * output.size(1), -1),
 59 |                          target.view(target.size(0) * target.size(1)))
 60 | 
 61 |         optimizer.zero_grad()
 62 |         loss.backward()
 63 |         torch.nn.utils.clip_grad_norm(model.parameters(), config.clip, 'inf')
 64 |         optimizer.step()
 65 | 
 66 |         n_total += batch.batch_size
 67 |         train_loss += loss.data[0] * batch.batch_size
 68 | 
 69 |         if iteration % config.log_every == 0:
 70 |             train_loss /= n_total
 71 |             val_loss = validate(val_iter, model, criterion)
 72 |             print("   % Time: {:5.0f} | Iteration: {:5} | Batch: {:4}/{}"
 73 |                   " | Train loss: {:.4f} | Val loss: {:.4f}"
 74 |                   .format(time.time() - init, iteration, train_iter.iterations,
 75 |                           len(train_iter), train_loss, val_loss))
 76 | 
 77 |             # test for val_loss improvement
 78 |             n_total = train_loss = 0
 79 |             if val_loss < best_val_loss:
 80 |                 best_val_loss = val_loss
 81 |                 n_bad_loss = 0
 82 |                 torch.save(model.state_dict(), config.best_model)
 83 |             else:
 84 |                 n_bad_loss += 1
 85 |             if n_bad_loss == config.n_bad_loss:
 86 |                 best_val_loss = val_loss
 87 |                 n_bad_loss = 0
 88 |                 adjust_learning_rate(optimizer, config.lr_decay)
 89 |                 new_lr = optimizer.param_groups[0]['lr']
 90 |                 print("=> Adjust learning rate to: {}".format(new_lr))
 91 |                 if new_lr < config.lr_min:
 92 |                     return True
 93 |     return False
 94 | 
 95 | 
 96 | def validate(val_iter, model, criterion):
 97 |     model.eval()
 98 |     val_loss = 0
 99 |     val_iter.init_epoch()
100 |     for batch in val_iter:
101 |         output, _, __ = model(batch.grapheme, batch.phoneme[:-1])
102 |         target = batch.phoneme[1:]
103 |         loss = criterion(output.squeeze(1), target.squeeze(1))
104 |         val_loss += loss.data[0] * batch.batch_size
105 |     return val_loss / len(val_iter.dataset)
106 | 
107 | 
108 | def test(test_iter, model, criterion):
109 |     model.eval()
110 |     test_iter.init_epoch()
111 |     test_per = test_wer = 0
112 |     for batch in test_iter:
113 | 
114 |         output = model(batch.grapheme).data.tolist()
115 |         target = batch.phoneme[1:].squeeze(1).data.tolist()
116 |         # calculate per, wer here
117 |         per = phoneme_error_rate(output, target)
118 |         wer = int(output != target)
119 |         test_per += per  # batch_size = 1
120 |         test_wer += wer
121 | 
122 |     test_per = test_per / len(test_iter.dataset) * 100
123 |     test_wer = test_wer / len(test_iter.dataset) * 100
124 |     print("Phoneme error rate (PER): {:.2f}\nWord error rate (WER): {:.2f}"
125 |           .format(test_per, test_wer))
126 | 
127 | def show(batch, model):
128 |     assert batch.batch_size == 1
129 |     g_field = batch.dataset.fields['grapheme']
130 |     p_field = batch.dataset.fields['phoneme']
131 |     prediction = model(batch.grapheme).data.tolist()[:-1]
132 |     grapheme = batch.grapheme.squeeze(1).data.tolist()[1:][::-1]
133 |     phoneme = batch.phoneme.squeeze(1).data.tolist()[1:-1]
134 |     #print("> {}\n= {}\n< {}\n".format(
135 |     #    ''.join([g_field.vocab.itos[g] for g in grapheme]),
136 |     #    ' '.join([p_field.vocab.itos[p] for p in phoneme]),
137 |     #    ' '.join([p_field.vocab.itos[p] for p in prediction])))
138 |     from hangul_utils import join_jamos
139 |     print("> {}\n= {}\n< {}\n".format(
140 |         join_jamos([g_field.vocab.itos[g] for g in grapheme]),
141 |         join_jamos([p_field.vocab.itos[p] for p in phoneme]),
142 |         join_jamos([p_field.vocab.itos[p] for p in prediction])))
143 | 
144 | def main():
145 |     args = argparse.Namespace(**parser)
146 | 
147 |     args.cuda = args.cuda and torch.cuda.is_available()
148 | 
149 |     if not os.path.isdir(args.intermediate_path):
150 |         os.makedirs(args.intermediate_path)
151 | 
152 |     if not os.path.isdir(args.data_path):
153 |         raise print("error")
154 |         #URL = "https://github.com/cmusphinx/cmudict/archive/master.zip"
155 |         #!wget $URL -O ../data/cmudict.zip
156 |         #!unzip ../data/cmudict.zip -d ../data/
157 |         #!mv ../data/cmudict-master $args.data_path
158 | 
159 |     torch.manual_seed(args.seed)
160 |     if args.cuda:
161 |         torch.cuda.manual_seed(args.seed)
162 | 
163 | 
164 |     g_field = data.Field(init_token='<s>',
165 |                          tokenize=(lambda x: list(x.split('(')[0])[::-1]))
166 |     p_field = data.Field(init_token='<os>', eos_token='</os>',
167 |                          #tokenize=(lambda x: x.split('#')[0].split()))
168 |                          tokenize=(lambda x: list(x)))
169 | 
170 | 
171 | 
172 | 
173 | 
174 |     train_data, val_data, test_data = Dict.splits(args.data_path ,args.train_data,args.valid_data,args.test_data,
175 |                                                   g_field, p_field, args.seed)
176 | 
177 | 
178 |     g_field.build_vocab(train_data, val_data, test_data)
179 |     p_field.build_vocab(train_data, val_data, test_data)
180 | 
181 |     device = None if args.cuda else -1  # None is current gpu
182 |     train_iter = data.BucketIterator(train_data, batch_size=args.batch_size,
183 |                                      repeat=False, device=device)
184 |     val_iter = data.Iterator(val_data, batch_size=1,
185 |                              train=False, sort=False, device=device)
186 |     test_iter = data.Iterator(test_data, batch_size=1,
187 |                               train=False, shuffle=True, device=device)
188 | 
189 | 
190 |     config = args
191 |     config.g_size = len(g_field.vocab)
192 |     config.p_size = len(p_field.vocab)
193 |     config.best_model = os.path.join(config.intermediate_path,
194 |                                      "best_model_adagrad_attn.pth")
195 | 
196 |     model = G2P(config)
197 |     criterion = nn.NLLLoss()
198 |     if config.cuda:
199 |         model.cuda()
200 |         criterion.cuda()
201 |     optimizer = optim.Adagrad(model.parameters(), lr=config.lr)  # use Adagrad
202 | 
203 |     if 1 == 1:  # change to True to train
204 | 
205 |         for epoch in range(1, config.epochs+1):
206 |             stop = train(config, train_iter,val_iter, model, criterion, optimizer, epoch)
207 |             if stop:
208 |                 break
209 | 
210 |     model.load_state_dict(torch.load(config.best_model))
211 |     test(test_iter, model, criterion)
212 | 
213 |     test_iter.init_epoch()
214 |     for i, batch in enumerate(test_iter):
215 |         show(batch, model)
216 |         #if i == 10:
217 |         #    break
218 | 
219 | 
220 | if __name__ == '__main__':
221 |     main()
222 | 
223 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | 
  7 | from utils import Beam
  8 | 
  9 | 
 10 | class Encoder(nn.Module):
 11 | 
 12 |     def __init__(self, vocab_size, d_embed, d_hidden):
 13 |         super(Encoder, self).__init__()
 14 |         self.embedding = nn.Embedding(vocab_size, d_embed)
 15 |         self.lstm = nn.LSTMCell(d_embed, d_hidden)
 16 |         self.d_hidden = d_hidden
 17 | 
 18 |     def forward(self, x_seq, cuda=False):
 19 |         o = []
 20 |         e_seq = self.embedding(x_seq)  # seq x batch x dim
 21 |         tt = torch.cuda if cuda else torch  # use cuda tensor or not
 22 |         # create initial hidden state and initial cell state
 23 |         h = Variable(tt.FloatTensor(e_seq.size(1), self.d_hidden).zero_())
 24 |         c = Variable(tt.FloatTensor(e_seq.size(1), self.d_hidden).zero_())
 25 | 
 26 |         for e in e_seq.chunk(e_seq.size(0), 0):
 27 |             e = e.squeeze(0)
 28 |             h, c = self.lstm(e, (h, c))
 29 |             o.append(h)
 30 |         return torch.stack(o, 0), h, c
 31 | 
 32 | class Attention(nn.Module):
 33 |     """Dot global attention from https://arxiv.org/abs/1508.04025"""
 34 | 
 35 |     def __init__(self, dim):
 36 |         super(Attention, self).__init__()
 37 |         self.linear = nn.Linear(dim * 2, dim, bias=False)
 38 | 
 39 |     def forward(self, x, context=None):
 40 |         if context is None:
 41 |             return x
 42 |         assert x.size(0) == context.size(0)  # x: batch x dim
 43 |         assert x.size(1) == context.size(2)  # context: batch x seq x dim
 44 |         attn = F.softmax(context.bmm(x.unsqueeze(2)).squeeze(2))
 45 |         weighted_context = attn.unsqueeze(1).bmm(context).squeeze(1)
 46 |         o = self.linear(torch.cat((x, weighted_context), 1))
 47 |         return F.tanh(o)
 48 | 
 49 | 
 50 | class Decoder(nn.Module):
 51 |     def __init__(self, vocab_size, d_embed, d_hidden):
 52 |         super(Decoder, self).__init__()
 53 | 
 54 |         if vocab_size < d_embed:
 55 |             self.embedding = nn.Embedding(vocab_size, vocab_size)
 56 |             self.embedding.weight.data = torch.eye(vocab_size)
 57 |             self.lstm = nn.LSTMCell(vocab_size, d_hidden)
 58 |         else:
 59 |             self.embedding = nn.Embedding(vocab_size, d_embed)
 60 |             self.lstm = nn.LSTMCell(d_embed, d_hidden)
 61 |         self.attn = Attention(d_hidden)
 62 |         self.linear = nn.Linear(d_hidden, vocab_size)
 63 | 
 64 |     def forward(self, x_seq, h, c, context=None):
 65 |         o = []
 66 |         e_seq = self.embedding(x_seq)
 67 |         for e in e_seq.chunk(e_seq.size(0), 0):
 68 |             e = e.squeeze(0)
 69 |             h, c = self.lstm(e, (h, c))
 70 |             o.append(self.attn(h, context))
 71 |         o = torch.stack(o, 0)
 72 |         o = self.linear(o.view(-1, h.size(1)))
 73 |         return F.log_softmax(o).view(x_seq.size(0), -1, o.size(1)), h, c
 74 | 
 75 | 
 76 | class G2P(nn.Module):
 77 |     def __init__(self, config):
 78 |         super(G2P, self).__init__()
 79 |         self.encoder = Encoder(config.g_size, config.d_embed,
 80 |                                config.d_hidden)
 81 |         self.decoder = Decoder(config.p_size, config.d_embed,
 82 |                                config.d_hidden)
 83 |         self.config = config
 84 | 
 85 |     def forward(self, g_seq, p_seq=None):
 86 |         o, h, c = self.encoder(g_seq, self.config.cuda)
 87 |         #context = o.t() if self.config.attention else None
 88 |         context = o.transpose(0,1) if self.config.attention else None
 89 |         if p_seq is not None:  # not generate
 90 |             return self.decoder(p_seq, h, c, context)
 91 |         else:
 92 |             assert g_seq.size(1) == 1  # make sure batch_size = 1
 93 |             return self._generate(h, c, context)
 94 | 
 95 |     def _generate(self, h, c, context):
 96 |         beam = Beam(self.config.beam_size, cuda=self.config.cuda)
 97 |         # Make a beam_size batch.
 98 |         h = h.expand(beam.size, h.size(1))
 99 |         c = c.expand(beam.size, c.size(1))
100 |         context = context.expand(beam.size, context.size(1), context.size(2))
101 | 
102 |         for i in range(self.config.max_len):  # max_len = 20
103 |             x = beam.get_current_state()
104 |             o, h, c = self.decoder(Variable(x.unsqueeze(0)), h, c, context)
105 |             if beam.advance(o.data.squeeze(0)):
106 |                 break
107 |             h.data.copy_(h.data.index_select(0, beam.get_current_origin()))
108 |             c.data.copy_(c.data.index_select(0, beam.get_current_origin()))
109 |         tt = torch.cuda if self.config.cuda else torch
110 |         return Variable(tt.LongTensor(beam.get_hyp(0)))


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import Levenshtein  # https://github.com/ztane/python-Levenshtein/
 4 | import torch
 5 | 
 6 | # Based on https://github.com/MaximumEntropy/Seq2Seq-PyTorch/
 7 | class Beam(object):
 8 |     """Ordered beam of candidate outputs."""
 9 | 
10 |     def __init__(self, size, pad=1, bos=2, eos=3, cuda=False):
11 |         """Initialize params."""
12 |         self.size = size
13 |         self.done = False
14 |         self.pad = pad
15 |         self.bos = bos
16 |         self.eos = eos
17 |         self.tt = torch.cuda if cuda else torch
18 | 
19 |         # The score for each translation on the beam.
20 |         self.scores = self.tt.FloatTensor(size).zero_()
21 | 
22 |         # The backpointers at each time-step.
23 |         self.prevKs = []
24 | 
25 |         # The outputs at each time-step.
26 |         self.nextYs = [self.tt.LongTensor(size).fill_(self.pad)]
27 |         self.nextYs[0][0] = self.bos
28 | 
29 |     # Get the outputs for the current timestep.
30 |     def get_current_state(self):
31 |         """Get state of beam."""
32 |         return self.nextYs[-1]
33 | 
34 |     # Get the backpointers for the current timestep.
35 |     def get_current_origin(self):
36 |         """Get the backpointer to the beam at this step."""
37 |         return self.prevKs[-1]
38 | 
39 |     def advance(self, workd_lk):
40 |         """Advance the beam."""
41 |         num_words = workd_lk.size(1)
42 | 
43 |         # Sum the previous scores.
44 |         if len(self.prevKs) > 0:
45 |             beam_lk = workd_lk + self.scores.unsqueeze(1).expand_as(workd_lk)
46 |         else:
47 |             beam_lk = workd_lk[0]
48 | 
49 |         flat_beam_lk = beam_lk.view(-1)
50 | 
51 |         bestScores, bestScoresId = flat_beam_lk.topk(self.size, 0,
52 |                                                      True, True)
53 |         self.scores = bestScores
54 | 
55 |         # bestScoresId is flattened beam x word array, so calculate which
56 |         # word and beam each score came from
57 |         prev_k = bestScoresId / num_words
58 |         self.prevKs.append(prev_k)
59 |         self.nextYs.append(bestScoresId - prev_k * num_words)
60 |         # End condition is when top-of-beam is EOS.
61 |         if self.nextYs[-1][0] == self.eos:
62 |             self.done = True
63 |         return self.done
64 | 
65 |     def get_hyp(self, k):
66 |         """Get hypotheses."""
67 |         hyp = []
68 |         # print(len(self.prevKs), len(self.nextYs), len(self.attn))
69 |         for j in range(len(self.prevKs) - 1, -1, -1):
70 |             hyp.append(self.nextYs[j + 1][k])
71 |             #defence for beamsearch error
72 |             if self.nextYs[j + 1][k] == self.eos:
73 |                 hyp=[self.eos]
74 |             k = self.prevKs[j][k]
75 |         return hyp[::-1]
76 | 
77 | 
78 | 
79 | 
80 | 
81 | def phoneme_error_rate(p_seq1, p_seq2):
82 |     p_vocab = set(p_seq1 + p_seq2)
83 |     p2c = dict(zip(p_vocab, range(len(p_vocab))))
84 |     c_seq1 = [chr(p2c[p]) for p in p_seq1]
85 |     c_seq2 = [chr(p2c[p]) for p in p_seq2]
86 |     return Levenshtein.distance(''.join(c_seq1),
87 |                                 ''.join(c_seq2)) / len(c_seq2)
88 | 
89 | def adjust_learning_rate(optimizer, lr_decay):
90 |     for param_group in optimizer.param_groups:
91 |         param_group['lr'] *= lr_decay


--------------------------------------------------------------------------------