├── README.md
├── _config.yml
├── images
    ├── DurIAN_4S.png
    ├── DurIAN_SC.png
    └── logo.png
├── index.md
└── wavs
    ├── 025_000009.wav
    ├── 025_r_1.wav
    ├── 025_r_2.wav
    ├── 025_r_3.wav
    ├── 025_r_4.wav
    ├── 025_r_5.wav
    ├── daj_000023.wav
    ├── daj_r_1.wav
    ├── daj_r_2.wav
    ├── daj_r_3.wav
    ├── daj_r_4.wav
    ├── daj_r_5.wav
    ├── dx_r_1.wav
    ├── dx_r_2.wav
    ├── dx_r_3.wav
    ├── dx_r_4.wav
    ├── dx_r_5.wav
    ├── gui_10000224.wav
    ├── gui_r_1.wav
    ├── gui_r_2.wav
    ├── gui_r_3.wav
    ├── gui_r_4.wav
    ├── gui_r_5.wav
    ├── inset_dvec
        ├── dvec_101.wav
        ├── dvec_1161.wav
        ├── dvec_1189.wav
        ├── dvec_1512.wav
        ├── dvec_238.wav
        └── dvec_658.wav
    ├── inset_emb
        ├── lut_101.wav
        ├── lut_1161.wav
        ├── lut_1189.wav
        ├── lut_1512.wav
        ├── lut_238.wav
        └── lut_658.wav
    ├── inset_ref
        ├── 101.wav
        ├── 1161.wav
        ├── 1189.wav
        ├── 1512.wav
        ├── 238.wav
        └── 658.wav
    ├── liu_88010505.wav
    ├── liu_r_1.wav
    ├── liu_r_2.wav
    ├── liu_r_3.wav
    ├── liu_r_4.wav
    ├── liu_r_5.wav
    ├── lpl_r_1.wav
    ├── lpl_r_2.wav
    ├── lpl_r_3.wav
    ├── lpl_r_4.wav
    ├── lpl_r_5.wav
    ├── lpl_sample.wav
    ├── outset_dvec
        ├── dvec_10001.wav
        ├── dvec_10003.wav
        ├── dvec_10004.wav
        └── dvec_10005.wav
    ├── song_dx_801000013.wav
    ├── speech_and_singing
        ├── speech_and_sing_10001.wav
        ├── speech_and_sing_10002.wav
        ├── speech_and_sing_10003.wav
        ├── speech_and_sing_10004.wav
        ├── speech_and_sing_10005.wav
        └── speech_and_sing_10006.wav
    ├── speech_only
        ├── speech_10001.wav
        ├── speech_10002.wav
        ├── speech_10003.wav
        ├── speech_10004.wav
        ├── speech_10005.wav
        └── speech_10006.wav
    ├── speech_ref
        ├── 10001.wav
        ├── 10002.wav
        ├── 10003.wav
        ├── 10004.wav
        ├── 10005.wav
        └── 10006.wav
    ├── ssx_08010642.wav
    ├── ssx_r_1.wav
    ├── ssx_r_2.wav
    ├── ssx_r_3.wav
    ├── ssx_r_4.wav
    └── ssx_r_5.wav


/README.md:
--------------------------------------------------------------------------------
1 | # DurIAN : DurIAN-SC: Duration Informed Attention Network based Singing Voice Conversion System
2 | 
3 | Project page for our paper "DurIAN : DurIAN-SC: Duration Informed Attention Network based Singing Voice Conversion System". Link is: https://tencent-ailab.github.io/learning_singing_from_speech/
4 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
2 | github:
3 |   is_project_page: false
4 | title: [DurIAN_SC]
5 | 


--------------------------------------------------------------------------------
/images/DurIAN_4S.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/images/DurIAN_4S.png


--------------------------------------------------------------------------------
/images/DurIAN_SC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/images/DurIAN_SC.png


--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/images/logo.png


--------------------------------------------------------------------------------
/index.md:
--------------------------------------------------------------------------------
  1 | # <center>DurIAN-SC: Duration Informed Attention Network based Singing Voice Conversion System</center>
  2 | 
  3 | <center>Liqiang Zhang, Chengzhu Yu, Heng Lu, Chao Weng, Chunlei Zhang, Yusong Wu, Xiang Xie, Zijin Li, Dong Yu</center>
  4 | <center>Tencent AI Lab</center>
  5 | 
  6 | 
  7 | ## Abstract
  8 | 
  9 | <div style="text-align: justify"> Singing voice conversion is converting the timbre in the source singing to the target speaker's voice while keeping singing content the same. However, singing data for target speaker is much more difficult to collect compared with normal speech data. In this paper, we introduce a singing voice conversion algorithm that is capable of generating high quality target speaker's singing using only his/her normal speech data. First, we manage to integrate the training and conversion process of speech and singing into one framework by unifying the features used in standard speech synthesis system and singing synthesis system. In this way, normal speech data can also contribute to singing voice conversion training, making the singing voice conversion system more robust especially when the singing database is small. Moreover, in order to achieve one-shot singing voice conversion, a speaker embedding module is developed using both speech and singing data, which provides target speaker identify information during conversion. Experiments indicate proposed sing conversion system can convert source singing to target speaker's high-quality singing with only 20 seconds of target speaker's enrollment speech data.</div> 
 10 | 
 11 | <br>
 12 | 
 13 | ![arch](images/DurIAN_SC.png)
 14 | 
 15 | <br>
 16 | 
 17 | ## Look Up Table(LUT) based speaker embedding & D-vector based speaker embedding 
 18 | 
 19 | <br>
 20 | \* <sup>Note: All samples are in Mandrin Chinese.</sup>
 21 | <br>
 22 | \* <sup>There are 6 in-set singers---3 male singers and 3 female singers, shown here.</sup>
 23 | <br>
 24 | \* <sup>The "Reference Voice" is shown here for timbre similarity test, which is the target singer's singing. </sup>
 25 | 
 26 | 
 27 | <br>
 28 | 
 29 | 
 30 | <table align="center">
 31 |   <thead>
 32 |     <tr>
 33 |       <th> </th>
 34 |       <th>Reference Voice</th>
 35 |       <th>LUT Sample</th>
 36 |       <th>D-vector Sample</th>
 37 |     </tr>
 38 |   </thead>
 39 |   <tbody>
 40 |     <tr>
 41 |       <th>Female Singer1</th>
 42 |       <td><audio controls="" preload="auto">
 43 |             <source src="wavs/inset_ref/101.wav"></audio></td>
 44 |       <td><audio controls="" preload="auto">
 45 |             <source src="wavs/inset_emb/lut_101.wav"></audio></td>
 46 |       <td><audio controls="" preload="auto">
 47 |             <source src="wavs/inset_dvec/dvec_101.wav"></audio></td>
 48 |     </tr>
 49 |     <tr>
 50 |       <th>Female Singer2</th>
 51 |       <td><audio controls="" preload="auto">
 52 |             <source src="wavs/inset_ref/238.wav"></audio></td>
 53 |       <td><audio controls="" preload="auto">
 54 |             <source src="wavs/inset_emb/lut_238.wav"></audio></td>
 55 |       <td><audio controls="" preload="auto">
 56 |             <source src="wavs/inset_dvec/dvec_238.wav"></audio></td>
 57 |     </tr>
 58 |     <tr>
 59 |       <th>Female Singer3</th>
 60 |       <td><audio controls="" preload="auto">
 61 |             <source src="wavs/inset_ref/1161.wav"></audio></td>
 62 |       <td><audio controls="" preload="auto">
 63 |             <source src="wavs/inset_emb/lut_1161.wav"></audio></td>
 64 |       <td><audio controls="" preload="auto">
 65 |             <source src="wavs/inset_dvec/dvec_1161.wav"></audio></td>
 66 |     </tr>
 67 |     <tr>
 68 |       <th>Male Singer1</th>
 69 |       <td><audio controls="" preload="auto">
 70 |             <source src="wavs/inset_ref/658.wav"></audio></td>
 71 |       <td><audio controls="" preload="auto">
 72 |             <source src="wavs/inset_emb/lut_658.wav"></audio></td>
 73 |       <td><audio controls="" preload="auto">
 74 |             <source src="wavs/inset_dvec/dvec_658.wav"></audio></td>
 75 |     </tr>
 76 |     <tr>
 77 |       <th>Male Singer2</th>
 78 |       <td><audio controls="" preload="auto">
 79 |             <source src="wavs/inset_ref/1189.wav"></audio></td>
 80 |       <td><audio controls="" preload="auto">
 81 |             <source src="wavs/inset_emb/lut_1189.wav"></audio></td>
 82 |       <td><audio controls="" preload="auto">
 83 |             <source src="wavs/inset_dvec/dvec_1189.wav"></audio></td>
 84 |     </tr>
 85 |     <tr>
 86 |       <th>Male Singer3</th>
 87 |       <td><audio controls="" preload="auto">
 88 |             <source src="wavs/inset_ref/1512.wav"></audio></td>
 89 |       <td><audio controls="" preload="auto">
 90 |             <source src="wavs/inset_emb/lut_1512.wav"></audio></td>
 91 |       <td><audio controls="" preload="auto">
 92 |             <source src="wavs/inset_dvec/dvec_1512.wav"></audio></td>
 93 |     </tr>
 94 |   </tbody>
 95 | </table>
 96 | 
 97 | <br>
 98 | 
 99 | ## Out-of-set test of D-vector based speaker embedding 
100 | 
101 | <br>
102 | \* <sup>Note: All samples are in Mandrin Chinese.</sup>
103 | <br>
104 | \* <sup>There are 4 out-of-set speakers---2 male speakers and 2 female speakers, shown here.</sup>
105 | <br>
106 | \* <sup>The "Register Voice" is shown here for similarity test, which is the target speaker's speech.</sup>
107 | 
108 | 
109 | <br>
110 | 
111 | <table align="center">
112 |   <thead>
113 |     <tr>
114 |       <th> </th>
115 |       <th>Register Voice</th>
116 |       <th>D-vector Sample</th>
117 |     </tr>
118 |   </thead>
119 |   <tbody>
120 |     <tr>
121 |       <th>Female Speaker1</th>
122 |       <td><audio controls="" preload="auto">
123 |             <source src="wavs/speech_ref/10004.wav"></audio></td>
124 |       <td><audio controls="" preload="auto">
125 |             <source src="wavs/outset_dvec/dvec_10004.wav"></audio></td>
126 |     </tr>
127 |     <tr>
128 |       <th>Female Speaker2</th>
129 |       <td><audio controls="" preload="auto">
130 |             <source src="wavs/speech_ref/10005.wav"></audio></td>
131 |       <td><audio controls="" preload="auto">
132 |             <source src="wavs/outset_dvec/dvec_10005.wav"></audio></td>
133 |     </tr>
134 |     <tr>
135 |       <th>Male Speaker1</th>
136 |       <td><audio controls="" preload="auto">
137 |             <source src="wavs/speech_ref/10003.wav"></audio></td>
138 |       <td><audio controls="" preload="auto">
139 |             <source src="wavs/outset_dvec/dvec_10003.wav"></audio></td>
140 |     </tr>
141 |     <tr>
142 |       <th>Male Speaker2</th>
143 |       <td><audio controls="" preload="auto">
144 |             <source src="wavs/speech_ref/10001.wav"></audio></td>
145 |       <td><audio controls="" preload="auto">
146 |             <source src="wavs/outset_dvec/dvec_10001.wav"></audio></td>
147 |     </tr>
148 |   </tbody>
149 | </table>
150 | 
151 | <br>
152 | 
153 | 
154 | ## Training with speech corpus
155 | 
156 | <br>
157 | \* <sup>Note: All samples are in Mandrin Chinese.</sup>
158 | <br>
159 | \* <sup>There are 6 in-set speakers---3 male speakers and 3 female speakers, shown here.</sup>
160 | <br>
161 | \* <sup>The "Reference Voice" is shown here for timbre similarity test, which is the target speaker's speech. </sup>
162 | <br>
163 | \* <sup>"Speech only" means training only with speech data, while "Speech & Singing" means training with speech data and other singers' singing data. </sup>
164 | 
165 | <br>
166 | 
167 | 
168 | <table align="center">
169 |   <thead>
170 |     <tr>
171 |       <th> </th>
172 |       <th>Reference Voice</th>
173 |       <th>Speech Only</th>
174 |       <th>Speechh & Singing</th>
175 |     </tr>
176 |   </thead>
177 |   <tbody>
178 |     <tr>
179 |       <th>Male Speaker1</th>
180 |       <td><audio controls="" preload="auto">
181 |             <source src="wavs/speech_ref/10001.wav"></audio></td>
182 |       <td><audio controls="" preload="auto">
183 |             <source src="wavs/speech_only/speech_10001.wav"></audio></td>
184 |       <td><audio controls="" preload="auto">
185 |             <source src="wavs/speech_and_singing/speech_and_sing_10001.wav"></audio></td>
186 |     </tr>
187 |     <tr>
188 |       <th>Male Speaker2</th>
189 |       <td><audio controls="" preload="auto">
190 |             <source src="wavs/speech_ref/10002.wav"></audio></td>
191 |       <td><audio controls="" preload="auto">
192 |             <source src="wavs/speech_only/speech_10002.wav"></audio></td>
193 |       <td><audio controls="" preload="auto">
194 |             <source src="wavs/speech_and_singing/speech_and_sing_10002.wav"></audio></td>
195 |     </tr>
196 |     <tr>
197 |       <th>Male Speaker3</th>
198 |       <td><audio controls="" preload="auto">
199 |             <source src="wavs/speech_ref/10003.wav"></audio></td>
200 |       <td><audio controls="" preload="auto">
201 |             <source src="wavs/speech_only/speech_10003.wav"></audio></td>
202 |       <td><audio controls="" preload="auto">
203 |             <source src="wavs/speech_and_singing/speech_and_sing_10003.wav"></audio></td>
204 |     </tr>
205 |     <tr>
206 |       <th>Female Speaker1</th>
207 |       <td><audio controls="" preload="auto">
208 |             <source src="wavs/speech_ref/10004.wav"></audio></td>
209 |       <td><audio controls="" preload="auto">
210 |             <source src="wavs/speech_only/speech_10004.wav"></audio></td>
211 |       <td><audio controls="" preload="auto">
212 |             <source src="wavs/speech_and_singing/speech_and_sing_10004.wav"></audio></td>
213 |     </tr>
214 |     <tr>
215 |       <th>Female Speaker2</th>
216 |       <td><audio controls="" preload="auto">
217 |             <source src="wavs/speech_ref/10005.wav"></audio></td>
218 |       <td><audio controls="" preload="auto">
219 |             <source src="wavs/speech_only/speech_10005.wav"></audio></td>
220 |       <td><audio controls="" preload="auto">
221 |             <source src="wavs/speech_and_singing/speech_and_sing_10005.wav"></audio></td>
222 |     </tr>
223 |     <tr>
224 |       <th>Female Speaker3</th>
225 |       <td><audio controls="" preload="auto">
226 |             <source src="wavs/speech_ref/10006.wav"></audio></td>
227 |       <td><audio controls="" preload="auto">
228 |             <source src="wavs/speech_only/speech_10006.wav"></audio></td>
229 |       <td><audio controls="" preload="auto">
230 |             <source src="wavs/speech_and_singing/speech_and_sing_10006.wav"></audio></td>
231 |     </tr>
232 |   </tbody>
233 | </table>
234 | 
235 | <br>


--------------------------------------------------------------------------------
/wavs/025_000009.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/025_000009.wav


--------------------------------------------------------------------------------
/wavs/025_r_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/025_r_1.wav


--------------------------------------------------------------------------------
/wavs/025_r_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/025_r_2.wav


--------------------------------------------------------------------------------
/wavs/025_r_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/025_r_3.wav


--------------------------------------------------------------------------------
/wavs/025_r_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/025_r_4.wav


--------------------------------------------------------------------------------
/wavs/025_r_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/025_r_5.wav


--------------------------------------------------------------------------------
/wavs/daj_000023.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/daj_000023.wav


--------------------------------------------------------------------------------
/wavs/daj_r_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/daj_r_1.wav


--------------------------------------------------------------------------------
/wavs/daj_r_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/daj_r_2.wav


--------------------------------------------------------------------------------
/wavs/daj_r_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/daj_r_3.wav


--------------------------------------------------------------------------------
/wavs/daj_r_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/daj_r_4.wav


--------------------------------------------------------------------------------
/wavs/daj_r_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/daj_r_5.wav


--------------------------------------------------------------------------------
/wavs/dx_r_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/dx_r_1.wav


--------------------------------------------------------------------------------
/wavs/dx_r_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/dx_r_2.wav


--------------------------------------------------------------------------------
/wavs/dx_r_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/dx_r_3.wav


--------------------------------------------------------------------------------
/wavs/dx_r_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/dx_r_4.wav


--------------------------------------------------------------------------------
/wavs/dx_r_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/dx_r_5.wav


--------------------------------------------------------------------------------
/wavs/gui_10000224.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/gui_10000224.wav


--------------------------------------------------------------------------------
/wavs/gui_r_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/gui_r_1.wav


--------------------------------------------------------------------------------
/wavs/gui_r_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/gui_r_2.wav


--------------------------------------------------------------------------------
/wavs/gui_r_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/gui_r_3.wav


--------------------------------------------------------------------------------
/wavs/gui_r_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/gui_r_4.wav


--------------------------------------------------------------------------------
/wavs/gui_r_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/gui_r_5.wav


--------------------------------------------------------------------------------
/wavs/inset_dvec/dvec_101.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_dvec/dvec_101.wav


--------------------------------------------------------------------------------
/wavs/inset_dvec/dvec_1161.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_dvec/dvec_1161.wav


--------------------------------------------------------------------------------
/wavs/inset_dvec/dvec_1189.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_dvec/dvec_1189.wav


--------------------------------------------------------------------------------
/wavs/inset_dvec/dvec_1512.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_dvec/dvec_1512.wav


--------------------------------------------------------------------------------
/wavs/inset_dvec/dvec_238.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_dvec/dvec_238.wav


--------------------------------------------------------------------------------
/wavs/inset_dvec/dvec_658.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_dvec/dvec_658.wav


--------------------------------------------------------------------------------
/wavs/inset_emb/lut_101.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_emb/lut_101.wav


--------------------------------------------------------------------------------
/wavs/inset_emb/lut_1161.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_emb/lut_1161.wav


--------------------------------------------------------------------------------
/wavs/inset_emb/lut_1189.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_emb/lut_1189.wav


--------------------------------------------------------------------------------
/wavs/inset_emb/lut_1512.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_emb/lut_1512.wav


--------------------------------------------------------------------------------
/wavs/inset_emb/lut_238.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_emb/lut_238.wav


--------------------------------------------------------------------------------
/wavs/inset_emb/lut_658.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_emb/lut_658.wav


--------------------------------------------------------------------------------
/wavs/inset_ref/101.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_ref/101.wav


--------------------------------------------------------------------------------
/wavs/inset_ref/1161.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_ref/1161.wav


--------------------------------------------------------------------------------
/wavs/inset_ref/1189.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_ref/1189.wav


--------------------------------------------------------------------------------
/wavs/inset_ref/1512.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_ref/1512.wav


--------------------------------------------------------------------------------
/wavs/inset_ref/238.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_ref/238.wav


--------------------------------------------------------------------------------
/wavs/inset_ref/658.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/inset_ref/658.wav


--------------------------------------------------------------------------------
/wavs/liu_88010505.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/liu_88010505.wav


--------------------------------------------------------------------------------
/wavs/liu_r_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/liu_r_1.wav


--------------------------------------------------------------------------------
/wavs/liu_r_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/liu_r_2.wav


--------------------------------------------------------------------------------
/wavs/liu_r_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/liu_r_3.wav


--------------------------------------------------------------------------------
/wavs/liu_r_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/liu_r_4.wav


--------------------------------------------------------------------------------
/wavs/liu_r_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/liu_r_5.wav


--------------------------------------------------------------------------------
/wavs/lpl_r_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/lpl_r_1.wav


--------------------------------------------------------------------------------
/wavs/lpl_r_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/lpl_r_2.wav


--------------------------------------------------------------------------------
/wavs/lpl_r_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/lpl_r_3.wav


--------------------------------------------------------------------------------
/wavs/lpl_r_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/lpl_r_4.wav


--------------------------------------------------------------------------------
/wavs/lpl_r_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/lpl_r_5.wav


--------------------------------------------------------------------------------
/wavs/lpl_sample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/lpl_sample.wav


--------------------------------------------------------------------------------
/wavs/outset_dvec/dvec_10001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/outset_dvec/dvec_10001.wav


--------------------------------------------------------------------------------
/wavs/outset_dvec/dvec_10003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/outset_dvec/dvec_10003.wav


--------------------------------------------------------------------------------
/wavs/outset_dvec/dvec_10004.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/outset_dvec/dvec_10004.wav


--------------------------------------------------------------------------------
/wavs/outset_dvec/dvec_10005.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/outset_dvec/dvec_10005.wav


--------------------------------------------------------------------------------
/wavs/song_dx_801000013.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/song_dx_801000013.wav


--------------------------------------------------------------------------------
/wavs/speech_and_singing/speech_and_sing_10001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_and_singing/speech_and_sing_10001.wav


--------------------------------------------------------------------------------
/wavs/speech_and_singing/speech_and_sing_10002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_and_singing/speech_and_sing_10002.wav


--------------------------------------------------------------------------------
/wavs/speech_and_singing/speech_and_sing_10003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_and_singing/speech_and_sing_10003.wav


--------------------------------------------------------------------------------
/wavs/speech_and_singing/speech_and_sing_10004.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_and_singing/speech_and_sing_10004.wav


--------------------------------------------------------------------------------
/wavs/speech_and_singing/speech_and_sing_10005.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_and_singing/speech_and_sing_10005.wav


--------------------------------------------------------------------------------
/wavs/speech_and_singing/speech_and_sing_10006.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_and_singing/speech_and_sing_10006.wav


--------------------------------------------------------------------------------
/wavs/speech_only/speech_10001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_only/speech_10001.wav


--------------------------------------------------------------------------------
/wavs/speech_only/speech_10002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_only/speech_10002.wav


--------------------------------------------------------------------------------
/wavs/speech_only/speech_10003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_only/speech_10003.wav


--------------------------------------------------------------------------------
/wavs/speech_only/speech_10004.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_only/speech_10004.wav


--------------------------------------------------------------------------------
/wavs/speech_only/speech_10005.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_only/speech_10005.wav


--------------------------------------------------------------------------------
/wavs/speech_only/speech_10006.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_only/speech_10006.wav


--------------------------------------------------------------------------------
/wavs/speech_ref/10001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_ref/10001.wav


--------------------------------------------------------------------------------
/wavs/speech_ref/10002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_ref/10002.wav


--------------------------------------------------------------------------------
/wavs/speech_ref/10003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_ref/10003.wav


--------------------------------------------------------------------------------
/wavs/speech_ref/10004.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_ref/10004.wav


--------------------------------------------------------------------------------
/wavs/speech_ref/10005.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_ref/10005.wav


--------------------------------------------------------------------------------
/wavs/speech_ref/10006.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/speech_ref/10006.wav


--------------------------------------------------------------------------------
/wavs/ssx_08010642.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/ssx_08010642.wav


--------------------------------------------------------------------------------
/wavs/ssx_r_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/ssx_r_1.wav


--------------------------------------------------------------------------------
/wavs/ssx_r_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/ssx_r_2.wav


--------------------------------------------------------------------------------
/wavs/ssx_r_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/ssx_r_3.wav


--------------------------------------------------------------------------------
/wavs/ssx_r_4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/ssx_r_4.wav


--------------------------------------------------------------------------------
/wavs/ssx_r_5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tencent-ailab/learning_singing_from_speech/f17488de7849c50b779839b5e6fc05674a3eabd6/wavs/ssx_r_5.wav


--------------------------------------------------------------------------------