├── .gitignore ├── LICENSE ├── README.md ├── doc ├── README.md ├── gettingStartedV40_006b.pdf ├── gettingStartedV40_006b.zip ├── morphingWithSTRAIGHT.tar.gz ├── morphingWithSTRAIGHTe.pdf └── straightTechRep.pdf ├── morphing_src ├── angryHai.mat ├── createMobject.m ├── directSTRAIGHTmorphing.m ├── displayMobject.m ├── executeSTRAIGHTanalysisM.m ├── executeSTRAIGHTanalysisMExt.m ├── executeSTRAIGHTsynthesisM.m ├── fixDummyObjectSize.m ├── makeLogarithmicLevelDifferenceBasedOnPeaks.m ├── neutralHai.mat ├── setAnchorFromRawAnchor.m ├── timeAlignedDirectSTRAIGHTmorphing.m ├── timeFrequencySTRAIGHTmorphing.m ├── timeFrequencySTRAIGHTmorphingExt.m ├── tmang.wav ├── tmneu.wav ├── updateFieldOfMobject.m └── waveformMorphing.m └── src ├── CheckAnalysisData.m ├── HzToErbRate.m ├── MulticueF0v14.m ├── ReadBinaryData.m ├── SynthesizeLegacy_STRAIGHT_default.m ├── TestAnalysisRegression.m ├── TestAnalysisRegressionR.m ├── TestCopySynthRegression.m ├── TestCopySynthRegressionR.m ├── WriteBinaryData.m ├── aiffread.m ├── aiffwrite.m ├── aperiodiccomp.m ├── aperiodicpartERB2.m ├── boundmes2.m ├── correctdpv.m ├── defaultparamsorg.m ├── exSinStraightSynth.m ├── exSinStraightSynthBU.m ├── exSinStraightSynthBU2.m ├── exstraightAPind.m ├── exstraightsource.m ├── exstraightspec.m ├── exstraightsynth.m ├── f0track5.m ├── fixpF0VexMltpBG4.m ├── fractpitch2.m ├── gdmap.m ├── getvalufromedit.m ├── isOctave.m ├── mktstr.m ├── multanalytFineCSPB.m ├── optimumsmoothing.m ├── plotcpower.m ├── powerchk.m ├── refineF06.m ├── regressionTestBaseGenerator.m ├── regressionTestBaseGeneratorR.m ├── smax.m ├── specreshape.m ├── straight.m ├── straightBodyC03ma.m ├── straightCIv1.m ├── straightPanel98bak.m ├── straightSynthTB06.m ├── straightSynthTB07ca.m ├── straightpanel98.mat ├── straightsound.m ├── syncgui.m ├── testBestMix.m └── vaiueo2d.wav /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .css 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2018 Hideki Kawahara All Rights Reserved 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Legacy STRAIGHT 2 | 3 | The legacy-STRAIGHT is a collection of speech analysis, modification and resynthesis tools. 4 | 5 | ## Installation 6 | 7 | Set MATLAB path to "src" directory. 8 | 9 | ## Quick start 10 | 11 | Paste the following code to MATLAB command window. It generates copy-synthesized output to the variable "syntheszed_signal". 12 | 13 | [x, fs] = audioread('vaiueo2d.wav'); 14 | f0raw = MulticueF0v14(x,fs); 15 | ap = exstraightAPind(x,fs,f0raw); 16 | n3sgram=exstraightspec(x,f0raw,fs); 17 | syntheszed_signal = exstraightsynth(f0raw,n3sgram,ap,fs); 18 | 19 | For running this using GNU Octave, please load signal package. 20 | 21 | ``` 22 | pkg load signal 23 | ``` 24 | 25 | ## Release note 26 | 27 | * [July 19, 2018; Prerelease] 28 | The "Quick start" example also runs properly on GNU Octave 4.4.0 on macOS High Sierra (10.13.6) 29 | 30 | * [July 17, 2018: Prerelease] 31 | Added documemts. The first release will be on July 24, 2018. 32 | 33 | * [July 16, 2018: Prerelease] 34 | This release is a copy of the latest version which was distributed by the first author (Hideki Kawahara) to academic communities. The version is named STRAIGHTV40_007. The last update was July 17, 2016. Kansai TLO has also licensed the legacy-STRAIGHT for commercial use. The licensees of the legacy-STRAIGHT agreed to make the legacy-STRAIGHT open to the public after July 15, 2018. 35 | 36 | ## Acknowledgment 37 | 38 | The legacy-STRAIGHT was supported by many coauthors, contributors, and funding agencies. 39 | 40 | *** 41 | 42 | Hideki Kawahara, 43 | July 16, 2018 (start date) -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Documents for legacy-STRAIGHT 2 | 3 | This directory consists of documents prepared for the legacy-STRAIGHT. 4 | 5 | Please note that the legacy-STRAIGHT is an ended project. The latest extended morphing framework uses the TANDEM-STRAIGHT. Dr. Masanori Morise, who invented the core component of the TANDEM-STRAIGHT, also distributes an open-source VOCODER framework called WORLD. [Link to mmorise/WORLD](https://github.com/mmorise/World) 6 | 7 | ## HTML and PDF documents (in this directory) 8 | 9 | * Getting started with command mode STRAIGHT (May 5, 2007) 10 | * file: gettingStartedV40_006b.zip (archived HTML document) 11 | * file: gettingStartedV40_006b.pdf (PDF version of the HTML document) 12 | * Auditory morphing using STRAIGHT (This framwork is outdated.)(November 7, 2005) 13 | * file: morphingWithSTRAIGHT.tar.gz (archived HTML document) 14 | * file: morphingWithSTRAIGHTe.pdf (PDF version of the HTML document) 15 | * STRAIGHT technical report (In Japanese) 16 | * file: straightTechRep.pdf (PDF focument) 17 | 18 | ## Publications 19 | 20 | * Hideki Kawahara, Ikuyo Masuda-Katsuse and Alain de Cheveigne: Restructuring speech representations using a pitch-adaptive time-frequency smoothing and an instantaneous-frequency-based F0 extraction: Possible role of a repetitive structure in sounds, Speech Communication, 27, pp.187-207 (1999) [Link](https://doi.org/10.1016/S0167-6393(98)00085-5) 21 | * This is the first journal paper on STRAIGHT. Spectral envelope estimation is still relevant. Descriptions on the source information are outdated. 22 | * Hideki Kawahara: STRAIGHT, Exploration of the other aspect of VOCODER: Perceptually isomorphic decomposition of speech sounds, Acoustic Science and Technology, Vol.27, No.6, (2006)[Link to pdf](http://www.jstage.jst.go.jp/article/ast/27/6/349/_pdf) 23 | * This is a featured paper introducing the underlying concept of STRAIGHT. 24 | * Hideki Kawahara, Alain de Cheveigné, Hideki Banno, Toru Takahashi and Toshio Irino, Nearly Defect-free F0 Trajectory Extraction for Expressive Speech Modifications based on STRAIGHT, Proc. Interspeech2005, Lisboa, pp.537-540, Sept. 2005.[Link to pdf](https://www.isca-speech.org/archive/archive_papers/interspeech_2005/i05_0537.pdf) 25 | * This conference paper introduces the latest F0 extractor for the legacy-STRAIGHT, called NDF. The performance of NDF is still competitive in practical situations. 26 | 27 | ## Link 28 | 29 | * [Hideki Kawahara](http://www.wakayama-u.ac.jp/~kawahara/index_e.html) 30 | 31 | *** 32 | Last update: Thu Oct 18 14:12:53 JST 2018 33 | 34 | 35 | -------------------------------------------------------------------------------- /doc/gettingStartedV40_006b.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/doc/gettingStartedV40_006b.pdf -------------------------------------------------------------------------------- /doc/gettingStartedV40_006b.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/doc/gettingStartedV40_006b.zip -------------------------------------------------------------------------------- /doc/morphingWithSTRAIGHT.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/doc/morphingWithSTRAIGHT.tar.gz -------------------------------------------------------------------------------- /doc/morphingWithSTRAIGHTe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/doc/morphingWithSTRAIGHTe.pdf -------------------------------------------------------------------------------- /doc/straightTechRep.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/doc/straightTechRep.pdf -------------------------------------------------------------------------------- /morphing_src/angryHai.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/morphing_src/angryHai.mat -------------------------------------------------------------------------------- /morphing_src/createMobject.m: -------------------------------------------------------------------------------- 1 | function mObject=createMobject 2 | % Create Mobjcet for morphing 3 | % mObject=createMobject 4 | 5 | % Designed and coded by Hideki Kawahara 6 | % 25/February/2005 7 | % 14/October/2005 Added creator information 8 | 9 | mObject.date = datestr(now); 10 | mObject.pwd = pwd; 11 | mObject.waveform = []; 12 | mObject.samplingFrequency = 44100; % default frequency 13 | mObject.F0 = []; 14 | mObject.vuv = []; 15 | mObject.spectrogram = []; 16 | mObject.aperiodicityIndex = []; 17 | mObject.frameUpdateInterval = 1; % default frame is 1ms 18 | mObject.anchorTimeLocation = []; 19 | mObject.maximumFrequencyPoints = 9; % default max frequency anchor points 20 | mObject.anchorFrequency = []; 21 | mObject.F0extractionConditions = []; 22 | mObject.SpectrumExtractionConditions = []; 23 | mObject.creatorInformation = which('createMobject'); 24 | -------------------------------------------------------------------------------- /morphing_src/directSTRAIGHTmorphing.m: -------------------------------------------------------------------------------- 1 | function mObject3 = directSTRAIGHTmorphing(mObject1,mObject2,mRate,mixMethod); 2 | % Morphing based on direct mixing of STRAIGHT parameters 3 | % (Without time alignment) 4 | % mObject3 = directSTRAIGHTmorphing(mObject1,mObject2,mRate); 5 | 6 | % Designed and coded by Hideki Kawahara 7 | % 27/Feb./2005 8 | % Copyright(c) 2005, Hideki Kawahara 9 | 10 | if mObject1.samplingFrequency ~= mObject2.samplingFrequency 11 | mObject3 = []; 12 | return 13 | end; 14 | if mObject1.frameUpdateInterval ~= mObject2.frameUpdateInterval 15 | mObject3 = []; 16 | return 17 | end; 18 | 19 | nw1 = length(mObject1.F0); 20 | nw2 = length(mObject2.F0); 21 | [nr1,nc1] = size(mObject1.spectrogram); 22 | [nr2,nc2] = size(mObject2.spectrogram); 23 | nr3 = max(nr1,nr2); 24 | nc3 = max(max(nc1,nc2),max(nw1,nw2)); 25 | nsg = zeros(nr3,nc3); 26 | nSgram = zeros(nr3,nc3); 27 | ap = zeros(nr3,nc3); 28 | f0 = zeros(nc3); 29 | nVoice = zeros(nc3); 30 | 31 | switch mixMethod 32 | case 'linear' 33 | nsg(1:nr1,1:nc1) = (1-mRate)*mObject1.spectrogram; 34 | nSgram(1:nr1,1:nc1) = nSgram(1:nr1,1:nc1)+(1-mRate); 35 | nsg(1:nr2,1:nc2) = mRate*mObject2.spectrogram+nsg(1:nr2,1:nc2); 36 | nSgram(1:nr2,1:nc2) = nSgram(1:nr2,1:nc2)+mRate; 37 | nsg = nsg./nSgram; 38 | case 'log' 39 | nsg(1:nr1,1:nc1) = (1-mRate)*log(mObject1.spectrogram); 40 | nSgram(1:nr1,1:nc1) = nSgram(1:nr1,1:nc1)+(1-mRate); 41 | nsg(1:nr2,1:nc2) = mRate*log(mObject2.spectrogram)+nsg(1:nr2,1:nc2); 42 | nSgram(1:nr2,1:nc2) = nSgram(1:nr2,1:nc2)+mRate; 43 | nsg = exp(nsg./nSgram); 44 | end; 45 | ap(1:nr1,1:nc1) = (1-mRate)*mObject1.aperiodicityIndex; 46 | ap(1:nr2,1:nc2) = mRate*mObject2.aperiodicityIndex+ap(1:nr2,1:nc2); 47 | 48 | f0(mObject1.F0>0) = (1-mRate)*log(mObject1.F0(mObject1.F0>0)); 49 | nVoice(mObject1.F0>0) = nVoice(mObject1.F0>0)+(1-mRate); 50 | f0(mObject2.F0>0) = mRate*log(mObject2.F0(mObject2.F0>0))+f0(mObject2.F0>0); 51 | nVoice(mObject2.F0>0) = nVoice(mObject2.F0>0)+mRate; 52 | f0(nVoice>0) = exp(f0(nVoice>0)./nVoice(nVoice>0)); 53 | 54 | mObject3 = createMobject; 55 | mObject3 = updateFieldOfMobject(mObject3,'spectrogram',nsg); 56 | mObject3 = updateFieldOfMobject(mObject3,'aperiodicityIndex',ap); 57 | mObject3 = updateFieldOfMobject(mObject3,'F0',f0); 58 | -------------------------------------------------------------------------------- /morphing_src/displayMobject.m: -------------------------------------------------------------------------------- 1 | function displayMobject(mObject,fieldname,note) 2 | % M-object information display 3 | % displayMobject(mObject,fieldname,note); 4 | 5 | % Designed and coded by Hideki Kawahara 6 | % 27/Feb./2005 7 | % Copyright(c) 2005, Hideki Kawahara 8 | % 05/Oct./2005 minor bug fix 9 | 10 | fs = mObject.samplingFrequency; 11 | tFrame = mObject.frameUpdateInterval; 12 | switch fieldname 13 | case 'spectrogram' 14 | figure 15 | [nrow,ncolumn]=size(mObject.spectrogram); 16 | timeSpan = [0 (ncolumn-1)*tFrame]; 17 | dBsgram = 20*log10(mObject.spectrogram); 18 | maxSgramdB = max(max(dBsgram)); 19 | imagesc(timeSpan, [0 fs/2],max(dBsgram,maxSgramdB-70)); 20 | axis('xy'); 21 | set(gca,'fontsize',14); 22 | xlabel('time (ms)'); 23 | ylabel('frequency (Hz)'); 24 | title([note ' time span 0 ' num2str(timeSpan(2),10) ' (ms) ' datestr(now)]); 25 | case 'waveform' 26 | figure 27 | x = mObject.waveform; 28 | timeSpan = (0:length(x)-1)/fs*1000; 29 | plot(timeSpan,x);grid on; 30 | axis([timeSpan(1) timeSpan(end) 1.1*[min(x) max(x)]]); 31 | set(gca,'fontsize',14); 32 | xlabel('time (ms)'); 33 | title([note ' time span 0 ' num2str(round(timeSpan(end)),8) ' (ms) ' datestr(now)]); 34 | case {'anchorFrequency', 'anchorTimeLocation'} 35 | figure 36 | [nrow,ncolumn]=size(mObject.spectrogram); 37 | timeSpan = [0 (ncolumn-1)*tFrame]; 38 | dBsgram = 20*log10(mObject.spectrogram); 39 | maxSgramdB = max(max(dBsgram)); 40 | imagesc(timeSpan, [0 fs/2],max(dBsgram,maxSgramdB-70)); 41 | axis('xy'); 42 | set(gca,'fontsize',14); 43 | xlabel('time (ms)'); 44 | ylabel('frequency (Hz)'); 45 | title([note ' time span 0 ' num2str(timeSpan(2),10) ' (ms) ' datestr(now)]); 46 | if length(mObject.anchorTimeLocation)>0 47 | hold on; 48 | for ii=1:length(mObject.anchorTimeLocation) 49 | hh = plot(mObject.anchorTimeLocation(ii)*[1 1],[0 fs/2],'w:'); 50 | set(hh,'linewidth',2); 51 | if sum(mObject.anchorFrequency(ii,:)>0)>0 52 | nFrequency = sum(mObject.anchorFrequency(ii,:)>0); 53 | anchorFrequencyVector = mObject.anchorFrequency(ii,mObject.anchorFrequency(ii,:)>0); % 05/Oct./2005 HK 54 | for jj=1:nFrequency 55 | hh=plot(mObject.anchorTimeLocation(ii),anchorFrequencyVector(jj),'ok'); 56 | set(hh,'markersize',9); 57 | set(hh,'linewidth',2); 58 | hh=plot(mObject.anchorTimeLocation(ii),anchorFrequencyVector(jj),'.w'); 59 | set(hh,'markersize',7); 60 | set(hh,'linewidth',4); 61 | end; 62 | end; 63 | end; 64 | hold off; 65 | end; 66 | end; 67 | 68 | -------------------------------------------------------------------------------- /morphing_src/executeSTRAIGHTanalysisM.m: -------------------------------------------------------------------------------- 1 | function mObject = executeSTRAIGHTanalysisM(mObject,optionalParameters); 2 | % STRAIGHT analysis for mObject 3 | % mObject = executeSTRAIGHTanalysisM(mObject,optionalParameters); 4 | % 5 | 6 | % Designed and coded by Hideki Kawahara 7 | % 26/Feb./2005 8 | % Copyright(c) 2005, Hideki Kawahara 9 | % 20/March/2006 bug fix by T. Takahashi and Kawahara 10 | 11 | x = mObject.waveform; 12 | fs = mObject.samplingFrequency; 13 | if nargin>1 14 | [f0raw,ap,prmF0] = exstraightsource(x,fs,optionalParameters); 15 | [n3sgram,analysisParamsSp]=exstraightspec(x,f0raw,fs,optionalParameters); 16 | else 17 | [f0raw,ap,prmF0] = exstraightsource(x,fs); 18 | [n3sgram,analysisParamsSp]=exstraightspec(x,f0raw,fs); 19 | end; 20 | if exist('vuv') % reserved for extension 21 | mObject.vuv = vuv; 22 | else 23 | mObject.vuv = (f0raw ~= 0); 24 | end; 25 | temporalIndexLength=min([length(f0raw),size(n3sgram,2),size(ap,2),length(mObject.vuv)]); 26 | mObject.F0 = f0raw(1:temporalIndexLength); 27 | mObject.spectrogram = n3sgram(:,1:temporalIndexLength); 28 | mObject.aperiodicityIndex = ap(:,1:temporalIndexLength); 29 | mObject.vuv = mObject.vuv(1:temporalIndexLength); 30 | mObject.frameUpdateInterval = prmF0.F0frameUpdateInterval; 31 | mObject.F0extractionConditions = prmF0; 32 | mObject.SpectrumExtractionConditions = analysisParamsSp; 33 | -------------------------------------------------------------------------------- /morphing_src/executeSTRAIGHTanalysisMExt.m: -------------------------------------------------------------------------------- 1 | function mObject = executeSTRAIGHTanalysisMExt(mObject,optionalParameters); 2 | % STRAIGHT analysis for mObject 3 | % mObject = executeSTRAIGHTanalysisMExt(mObject,optionalParameters); 4 | % 5 | 6 | % Designed and coded by Hideki Kawahara 7 | % 26/Feb./2005 8 | % Copyright(c) 2005, Hideki Kawahara 9 | % 20/March/2006 bug fix by T. Takahashi and Kawahara 10 | % 16/Aug./2008 extended for use MulticueF0 as default 11 | 12 | x = mObject.waveform; 13 | fs = mObject.samplingFrequency; 14 | if nargin>1 15 | %[f0raw,ap,prmF0] = exstraightsource(x,fs,optionalParameters); 16 | [f0raw,vuv,auxouts,prmF0]=MulticueF0v14(x,fs,optionalParameters); 17 | [n3sgram,analysisParamsSp]=exstraightspec(x,f0raw,fs,optionalParameters); 18 | [ap,analysisParams]=exstraightAPind(x,fs,f0raw,optionalParameters); 19 | else 20 | %[f0raw,ap,prmF0] = exstraightsource(x,fs); 21 | [f0raw,vuv,auxouts,prmF0]=MulticueF0v14(x,fs); 22 | [n3sgram,analysisParamsSp]=exstraightspec(x,f0raw,fs); 23 | [ap,analysisParams]=exstraightAPind(x,fs,f0raw); 24 | end; 25 | if exist('vuv') % reserved for extension 26 | mObject.vuv = vuv; 27 | else 28 | mObject.vuv = (f0raw ~= 0); 29 | end; 30 | temporalIndexLength=min([length(f0raw),size(n3sgram,2),size(ap,2),length(mObject.vuv)]); 31 | mObject.F0 = f0raw(1:temporalIndexLength); 32 | mObject.spectrogram = n3sgram(:,1:temporalIndexLength); 33 | mObject.aperiodicityIndex = ap(:,1:temporalIndexLength); 34 | mObject.vuv = mObject.vuv(1:temporalIndexLength); 35 | mObject.frameUpdateInterval = prmF0.F0frameUpdateInterval; 36 | mObject.F0extractionConditions = prmF0; 37 | mObject.SpectrumExtractionConditions = analysisParamsSp; 38 | mObject.AperiodicityAnalysisParams = analysisParams; 39 | -------------------------------------------------------------------------------- /morphing_src/executeSTRAIGHTsynthesisM.m: -------------------------------------------------------------------------------- 1 | function [sy,prmS] = executeSTRAIGHTsynthesisM(mObject,optionalParameters) 2 | % STRAIGHT synthesis from mObject 3 | % sy = executeSTRAIGHTsynthesisM(mObject,optionalParameters); 4 | % 5 | 6 | % Designed and coded by Hideki Kawahara 7 | % 27/Feb./2005 8 | % Copyright(c) 2005, Hideki Kawahara 9 | % 14/March/2005 bug fix on optional paramters 10 | % 10/June/2006 extension for the new F0 extractor 11 | 12 | fs = mObject.samplingFrequency; 13 | f0raw = mObject.F0; 14 | if isfield(mObject,'vuv') 15 | if length(mObject.vuv) == length(mObject.F0) 16 | f0raw = f0raw.*mObject.vuv; 17 | end; 18 | end; 19 | n3sgram = mObject.spectrogram; 20 | ap = mObject.aperiodicityIndex; 21 | if nargin>1 22 | [sy,prmS] = exstraightsynth(f0raw,n3sgram,ap,fs,optionalParameters); 23 | else 24 | [sy,prmS] = exstraightsynth(f0raw,n3sgram,ap,fs); 25 | end; 26 | -------------------------------------------------------------------------------- /morphing_src/fixDummyObjectSize.m: -------------------------------------------------------------------------------- 1 | function dummyObject = fixDummyObjectSize(dummyObject,originalObject); 2 | 3 | frameUpdateInterval = dummyObject.frameUpdateInterval; 4 | endMargin = size(originalObject.spectrogram,2)*frameUpdateInterval-max(originalObject.anchorTimeLocation); 5 | if size(dummyObject.spectrogram,2)*frameUpdateInterval < max(dummyObject.anchorTimeLocation)+endMargin 6 | dummyFrameSize = max(dummyObject.anchorTimeLocation)+endMargin; 7 | dimmyFrequencySize = size(originalObject.spectrogram,1); 8 | dummyObject.spectrogram = ones(dimmyFrequencySize,dummyFrameSize); 9 | dummyObject.aperiodicityIndex = ones(dimmyFrequencySize,dummyFrameSize); 10 | dummyObject.F0 = ones(1,dummyFrameSize); 11 | dummyObject.vuv = ones(1,dummyFrameSize); 12 | end; 13 | -------------------------------------------------------------------------------- /morphing_src/makeLogarithmicLevelDifferenceBasedOnPeaks.m: -------------------------------------------------------------------------------- 1 | function mObject = makeLogarithmicLevelDifferenceBasedOnPeaks(mObject,levelDifferenceTable) 2 | -------------------------------------------------------------------------------- /morphing_src/neutralHai.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/morphing_src/neutralHai.mat -------------------------------------------------------------------------------- /morphing_src/setAnchorFromRawAnchor.m: -------------------------------------------------------------------------------- 1 | function mObject = setAnchorFromRawAnchor(mObject,rawAnchor); 2 | % Set anchor points using raw anchor information 3 | % mObject = setAnchorFromRawAnchor(mObject,rawAnchor); 4 | 5 | % Designed and coded by Hideki Kawahara 6 | % 27/Feb./2005 7 | % Copyright(c) 2005, Hideki Kawahara 8 | 9 | TIMINGmARGIN = 10; % threshould for merging location 10 | [dm1,indsrt] = sort(rawAnchor(:,1)); 11 | sortedAnchor = rawAnchor(indsrt,1); 12 | sortedFrequency = rawAnchor(indsrt,2); 13 | indexNumber = 1:length(sortedAnchor); 14 | 15 | %anchorCandidate = sortedAnchor(diff([-100;sortedAnchor])>TIMINGmARGIN); 16 | anchorIndex = indexNumber(diff([-100;sortedAnchor])>TIMINGmARGIN); 17 | anchorCandidate = sortedAnchor(anchorIndex); 18 | mObject.anchorTimeLocation = anchorCandidate; 19 | nFrequency = mObject.maximumFrequencyPoints; 20 | 21 | nAnchor = length(anchorCandidate); 22 | sortedAnchor(end+1) = sortedAnchor(end)+1; 23 | anchorIndex(end+1) = anchorIndex(end)+1; % Terminator 24 | frequencyAnchor = zeros(nAnchor,nFrequency); 25 | for ii=1:nAnchor 26 | iFrequency = 0; 27 | anchorLocation = 0; 28 | for jj=1:min(nFrequency,anchorIndex(ii+1)-anchorIndex(ii)+1) 29 | if sortedAnchor((jj-1)+anchorIndex(ii)) < sortedAnchor(anchorIndex(ii+1)) 30 | frequencyAnchor(ii,jj) = sortedFrequency((jj-1)+anchorIndex(ii)); 31 | anchorLocation = anchorLocation+sortedAnchor((jj-1)+anchorIndex(ii)); 32 | iFrequency = iFrequency+1; 33 | end; 34 | end; 35 | if iFrequency>1 36 | [dmy1,indsrt] = sort(frequencyAnchor(ii,1:iFrequency)); 37 | frequencyAnchor(ii,1:iFrequency) = frequencyAnchor(ii,indsrt); 38 | mObject.anchorTimeLocation(ii) = anchorLocation/iFrequency; 39 | end; 40 | end; 41 | mObject.anchorFrequency = frequencyAnchor; 42 | -------------------------------------------------------------------------------- /morphing_src/timeAlignedDirectSTRAIGHTmorphing.m: -------------------------------------------------------------------------------- 1 | function mObject3 = timeAlignedDirectSTRAIGHTmorphing(mObject1,mObject2,mRate,mixMethod); 2 | % Morphing based on time-aligned mixing of STRAIGHT parameters 3 | % mObject3 = timeAlignedDirectSTRAIGHTmorphing(mObject1,mObject2,mRate,mixMethod); 4 | 5 | % Designed and coded by Hideki Kawahara 6 | % 28/Feb./2005 7 | % Copyright(c) 2005, Hideki Kawahara 8 | 9 | mObject3 = checkForSimilarity(mObject1,mObject2); 10 | if length(mObject3) ==0;return;end; 11 | dtFrame = mObject1.frameUpdateInterval; 12 | endLocation1 = (length(mObject1.F0)-1)*dtFrame; % in ms 13 | endLocation2 = (length(mObject2.F0)-1)*dtFrame; % in ms 14 | timeAnchor1 = [0;mObject1.anchorTimeLocation;endLocation1]; 15 | timeAnchor2 = [0;mObject2.anchorTimeLocation;endLocation2]; 16 | locationOn1 = (0:length(mObject1.F0)-1)*dtFrame; 17 | locationOn2 = (0:length(mObject2.F0)-1)*dtFrame; 18 | mapFrom1to2 = interp1(timeAnchor1,timeAnchor2,locationOn1); 19 | [nr1,nc1] = size(mObject1.spectrogram); 20 | [nr2,nc2] = size(mObject2.spectrogram); 21 | 22 | %---- mixing on mObject1's time axis 23 | nAxis1 = length(locationOn1); 24 | nAxis2 = length(locationOn2); 25 | morphedF0 = zeros(nAxis1,1); 26 | morphedAp = zeros(nr1,nAxis1); 27 | morphedSgram = zeros(nr1,nAxis1); 28 | weightSumF0 = zeros(nAxis1,1); 29 | for ii=1:nAxis1 30 | mappedIndexOn2 = mapFrom1to2(ii)/dtFrame+1; 31 | iFloor = floor(mappedIndexOn2); 32 | iFraction = mappedIndexOn2-iFloor; 33 | dAp = iFraction*(mObject2.aperiodicityIndex(:,min(iFloor+1,nAxis2))-mObject2.aperiodicityIndex(:,iFloor)); 34 | morphedAp(:,ii) = (1-mRate)*mObject1.aperiodicityIndex(:,ii)+mRate*(mObject2.aperiodicityIndex(:,iFloor)+dAp); 35 | switch mixMethod 36 | case 'linear' 37 | dSgram = iFraction*(mObject2.spectrogram(:,min(iFloor+1,nAxis2))-mObject2.spectrogram(:,iFloor)); 38 | morphedSgram(:,ii) = (1-mRate)*mObject1.spectrogram(:,ii)+mRate*(mObject2.spectrogram(:,iFloor)+dSgram); 39 | case 'log' 40 | dSgram = iFraction*(log(mObject2.spectrogram(:,min(iFloor+1,nAxis2)))-log(mObject2.spectrogram(:,iFloor))); 41 | tmp = (1-mRate)*log(mObject1.spectrogram(:,ii))+mRate*(log(mObject2.spectrogram(:,iFloor))+dSgram); 42 | morphedSgram(:,ii) = exp(tmp); 43 | end; 44 | if mObject1.F0(ii)>0 45 | morphedF0(ii) = (1-mRate)*log(mObject1.F0(ii)); 46 | weightSumF0(ii) = (1-mRate); 47 | end; 48 | if (mObject2.F0(iFloor)>0) & (mObject2.F0(min(iFloor+1,nAxis2))>0) 49 | dF0 = iFraction*(log(mObject2.F0(min(iFloor+1,nAxis2)))-log(mObject2.F0(iFloor))); 50 | morphedF0(ii) = mRate*(log(mObject2.F0(iFloor))+dF0)+morphedF0(ii); 51 | weightSumF0(ii) = weightSumF0(ii)+mRate; 52 | end; 53 | end; 54 | morphedF0(weightSumF0>0) = exp(morphedF0(weightSumF0>0)./weightSumF0(weightSumF0>0)); 55 | 56 | %----- mapping back onto morphed time axis 57 | timeAnchorMorph = (1-mRate)*timeAnchor1 + mRate*timeAnchor2; 58 | locationOnMorph = (0:(timeAnchorMorph(end)/dtFrame))*dtFrame; 59 | mapFormMorphTo1 = interp1(timeAnchorMorph,timeAnchor1,locationOnMorph); 60 | nAxisMorph = length(locationOnMorph); 61 | morphedApOnMorph = zeros(nr1,nAxisMorph); 62 | morphedSgramOnMorph = zeros(nr1,nAxisMorph); 63 | morphedF0onMorph = zeros(nAxisMorph,1); 64 | for ii=1:nAxisMorph 65 | mappedIndexOn1 = mapFormMorphTo1(ii)/dtFrame+1; 66 | iFloor = floor(mappedIndexOn1); 67 | iFraction = mappedIndexOn1-iFloor; 68 | morphedApOnMorph(:,ii) = morphedAp(:,iFloor) ... 69 | +iFraction*(morphedAp(:,min(iFloor+1,nAxis1))-morphedAp(:,iFloor)); 70 | morphedSgramOnMorph(:,ii) = morphedSgram(:,iFloor) ... 71 | +iFraction*(morphedSgram(:,min(iFloor+1,nAxis1))-morphedSgram(:,iFloor)); 72 | if (morphedF0(iFloor)>0) & (morphedF0(min(iFloor+1,nAxis1))>0) 73 | dF0 = iFraction*(morphedF0(min(iFloor+1,nAxis1))-morphedF0(iFloor)); 74 | morphedF0onMorph(ii) = morphedF0(iFloor)+dF0; 75 | end; 76 | end; 77 | mObject3.F0 = morphedF0onMorph; 78 | mObject3.aperiodicityIndex = morphedApOnMorph; 79 | mObject3.spectrogram = morphedSgramOnMorph; 80 | mObject3.anchorTimeLocation = timeAnchorMorph(2:end-1); 81 | mObject3.anchorFrequency = (1-mRate)*mObject1.anchorFrequency+mRate*mObject2.anchorFrequency; 82 | %mObject3 = morphedAp; % This line is a dummy. 83 | 84 | %%% ------ Internal function to check for object's similarity 85 | function mObject3 = checkForSimilarity(mObject1,mObject2) 86 | mObject3 = []; 87 | if mObject1.samplingFrequency ~= mObject2.samplingFrequency;mObject3 = [];return;end; 88 | if mObject1.frameUpdateInterval ~= mObject2.frameUpdateInterval;mObject3 = [];return;end; 89 | if length(mObject1.anchorTimeLocation) ~= length(mObject2.anchorTimeLocation);mObject3 = [];return;end; 90 | nAnchor = length(mObject1.anchorTimeLocation); 91 | for ii=1:nAnchor % check for similarity of anchor structure 92 | frequencyAnchor1 = mObject1.anchorFrequency; 93 | frequencyAnchor2 = mObject2.anchorFrequency; 94 | if (sum(frequencyAnchor1>0) ~= sum(frequencyAnchor2>0)) | ... 95 | (sum(frequencyAnchor1<0) ~= sum(frequencyAnchor2<0)) 96 | return; 97 | end; 98 | end; 99 | mObject3 = createMobject; 100 | -------------------------------------------------------------------------------- /morphing_src/timeFrequencySTRAIGHTmorphing.m: -------------------------------------------------------------------------------- 1 | function mObject3 = timeFrequencySTRAIGHTmorphing(mObject1,mObject2,mRate,mixMethod); 2 | % Morphing based on STRAIGHT parameters 3 | % mObject3 = timeFrequencySTRAIGHTmorphing(mObject1,mObject2,mRate,mixMethod); 4 | 5 | % Designed and coded by Hideki Kawahara 6 | % 28/Feb./2005 7 | % Copyright(c) 2005, Hideki Kawahara 8 | % 14/March/2005 bug fix on sampling frequency 9 | % 01/Oct./2005 bug fix on similarity check 10 | % 04/Oct./2005 partial morphing extension 11 | % 18/Oct./2005 direct differential manipulation and API cange 12 | % 29/Jan./2006 bug fix on boundary conditions 13 | 14 | switch nargin 15 | case 0 16 | mObject3.morphingObject = createMobject; 17 | mixRate.F0 = 0; 18 | mixRate.spectrum = 0; 19 | mixRate.aperiodicity = 0; 20 | mixRate.coordinate = 0; 21 | mObject3.mixRate = mixRate; 22 | mObject3.mixMethods = {'linear','log','differentialLogarithm'}; 23 | return 24 | end; 25 | mObject3 = checkForSimilarity(mObject1,mObject2); 26 | mixRate = checkForMorphingConditions(mRate); 27 | mObject1 = checkForIntegrity(mObject1); 28 | mObject2 = checkForIntegrity(mObject2); 29 | fs = mObject1.samplingFrequency; 30 | if length(mObject3) ==0;return;end; 31 | dtFrame = mObject1.frameUpdateInterval; 32 | endLocation1 = (length(mObject1.F0)-1)*dtFrame; % in ms 33 | endLocation2 = (length(mObject2.F0)-1)*dtFrame; % in ms 34 | timeAnchor1 = [0;mObject1.anchorTimeLocation;endLocation1]; 35 | timeAnchor2 = [0;mObject2.anchorTimeLocation;endLocation2]; 36 | locationOn1 = (0:length(mObject1.F0)-1)*dtFrame; 37 | locationOn2 = (0:length(mObject2.F0)-1)*dtFrame; 38 | mapFrom1to2 = interp1(timeAnchor1,timeAnchor2,locationOn1); 39 | [nr1,nc1] = size(mObject1.spectrogram); 40 | [nr2,nc2] = size(mObject2.spectrogram); 41 | 42 | %---- initialize frequency mapping function 43 | fmapFrom1to2OnTime1 = generateFrequencyMap(mObject1,mObject2); 44 | 45 | %---- mixing on mObject1's time axis 46 | nAxis1 = length(locationOn1); 47 | nAxis2 = length(locationOn2); 48 | morphedF0 = zeros(nAxis1,1); 49 | morphedAp = zeros(nr1,nAxis1); 50 | morphedSgram = zeros(nr1,nAxis1); 51 | weightSumF0 = zeros(nAxis1,1); 52 | for ii=1:nAxis1 53 | mappedIndexOn2 = mapFrom1to2(ii)/dtFrame+1; 54 | iFloor = floor(mappedIndexOn2); 55 | iFraction = mappedIndexOn2-iFloor; 56 | fIndex = floor(fmapFrom1to2OnTime1(:,ii)/fs*2*(nr1-1))+1; 57 | dAp = iFraction*(mObject2.aperiodicityIndex(:,min(iFloor+1,nAxis2))-mObject2.aperiodicityIndex(:,min(iFloor,nAxis2))); 58 | ap2on2faxis = mObject2.aperiodicityIndex(:,min(iFloor,nAxis2))+dAp; 59 | ap2on1faxis = ap2on2faxis(fIndex); 60 | morphedAp(:,ii) = (1-mixRate.aperiodicity)*mObject1.aperiodicityIndex(:,ii)+mixRate.aperiodicity*ap2on1faxis; %04/Oct/2005 HK 61 | switch mixMethod 62 | case 'linear' 63 | dSgram = iFraction*(mObject2.spectrogram(:,min(iFloor+1,nAxis2))-mObject2.spectrogram(:,min(iFloor,nAxis2))); 64 | sgram2on2faxis = mObject2.spectrogram(:,min(iFloor,nAxis2))+dSgram; 65 | sgram2on1faxis = sgram2on2faxis(fIndex); 66 | morphedSgram(:,ii) = (1-mixRate.spectrum)*mObject1.spectrogram(:,ii)+mixRate.spectrum*sgram2on1faxis; 67 | case 'log' 68 | dSgram = iFraction*(log(mObject2.spectrogram(:,min(iFloor+1,nAxis2)))-log(mObject2.spectrogram(:,min(iFloor,nAxis2)))); 69 | sgram2on2faxis = log(mObject2.spectrogram(:,min(iFloor,nAxis2)))+dSgram; 70 | sgram2on1faxis = sgram2on2faxis(fIndex); 71 | tmp = (1-mixRate.spectrum)*log(mObject1.spectrogram(:,ii))+mixRate.spectrum*sgram2on1faxis; 72 | morphedSgram(:,ii) = exp(tmp); 73 | case 'differentialLogarithm' 74 | dSgram = iFraction*(mObject2.spectrogram(:,min(iFloor+1,nAxis2))-mObject2.spectrogram(:,min(iFloor,nAxis2))); 75 | sgram2on2faxis = mObject2.spectrogram(:,min(iFloor,nAxis2))+dSgram; 76 | sgram2on1faxis = sgram2on2faxis(fIndex); 77 | tmp = (1-mixRate.spectrum)*log(mObject1.spectrogram(:,ii))+mixRate.spectrum*sgram2on1faxis; 78 | morphedSgram(:,ii) = exp(tmp); 79 | end; 80 | if mObject1.F0(ii)>0 81 | morphedF0(ii) = (1-mixRate.F0)*log(mObject1.F0(ii)); 82 | weightSumF0(ii) = (1-mixRate.F0); 83 | end; 84 | if (mObject2.F0(iFloor)>0) & (mObject2.F0(min(iFloor+1,nAxis2))>0) 85 | dF0 = iFraction*(log(mObject2.F0(min(iFloor+1,nAxis2)))-log(mObject2.F0(min(iFloor,nAxis2)))); 86 | morphedF0(ii) = mixRate.F0*(log(mObject2.F0(min(iFloor,nAxis2)))+dF0)+morphedF0(ii); 87 | weightSumF0(ii) = weightSumF0(ii)+mixRate.F0; 88 | end; 89 | end; 90 | morphedF0(weightSumF0>0) = exp(morphedF0(weightSumF0>0)./weightSumF0(weightSumF0>0)); 91 | 92 | %----- mapping back onto morphed time axis 93 | timeAnchorMorph = (1-mixRate.coordinate)*timeAnchor1 + mixRate.coordinate*timeAnchor2; 94 | locationOnMorph = (0:(timeAnchorMorph(end)/dtFrame))*dtFrame; 95 | mapFormMorphTo1 = interp1(timeAnchorMorph,timeAnchor1,locationOnMorph); 96 | nAxisMorph = length(locationOnMorph); 97 | morphedApOnMorph = zeros(nr1,nAxisMorph); 98 | morphedSgramOnMorph = zeros(nr1,nAxisMorph); 99 | morphedF0onMorph = zeros(nAxisMorph,1); 100 | %----- set place holders 101 | mObject3.samplingFrequency = fs; 102 | mObject3.F0 = morphedF0onMorph; 103 | mObject3.aperiodicityIndex = morphedApOnMorph; 104 | mObject3.spectrogram = morphedSgramOnMorph; 105 | mObject3.anchorTimeLocation = timeAnchorMorph(2:end-1); 106 | mObject3.anchorFrequency = (1-mixRate.coordinate)*mObject1.anchorFrequency+mixRate.coordinate*mObject2.anchorFrequency; 107 | %------ nitialize frequency mapping function 108 | fmapFromMorphto1OnTimeMorph = generateFrequencyMap(mObject3,mObject1); 109 | for ii=1:nAxisMorph 110 | mappedIndexOn1 = mapFormMorphTo1(ii)/dtFrame+1; 111 | iFloor = floor(mappedIndexOn1); 112 | iFraction = mappedIndexOn1-iFloor; 113 | fIndex = floor(fmapFromMorphto1OnTimeMorph(:,ii)/fs*2*(nr1-1))+1; 114 | morphedApOnMorph(:,ii) = morphedAp(fIndex,iFloor) ... 115 | +iFraction*(morphedAp(fIndex,min(iFloor+1,nAxis1))-morphedAp(fIndex,iFloor)); 116 | morphedSgramOnMorph(:,ii) = morphedSgram(fIndex,iFloor) ... 117 | +iFraction*(morphedSgram(fIndex,min(iFloor+1,nAxis1))-morphedSgram(fIndex,iFloor)); 118 | if (morphedF0(iFloor)>0) & (morphedF0(min(iFloor+1,nAxis1))>0) 119 | dF0 = iFraction*(morphedF0(min(iFloor+1,nAxis1))-morphedF0(iFloor)); 120 | morphedF0onMorph(ii) = morphedF0(iFloor)+dF0; 121 | end; 122 | end; 123 | mObject3.F0 = morphedF0onMorph; 124 | mObject3.aperiodicityIndex = morphedApOnMorph; 125 | mObject3.spectrogram = morphedSgramOnMorph; 126 | mObject3.anchorTimeLocation = timeAnchorMorph(2:end-1); 127 | %mObject3.anchorFrequency = (1-mRate)*mObject1.anchorFrequency+mRate*mObject2.anchorFrequency; 128 | %mObject3 = fmapFromMorphto1OnTimeMorph; % This line is a dummy. 129 | return; 130 | 131 | %%% ------ Internal function to check for object's similarity 132 | function mObject3 = checkForSimilarity(mObject1,mObject2) 133 | mObject3 = []; 134 | if mObject1.samplingFrequency ~= mObject2.samplingFrequency;mObject3 = [];return;end; 135 | if mObject1.frameUpdateInterval ~= mObject2.frameUpdateInterval;mObject3 = [];return;end; 136 | if length(mObject1.anchorTimeLocation) ~= length(mObject2.anchorTimeLocation);mObject3 = [];return;end; 137 | nAnchor = length(mObject1.anchorTimeLocation); 138 | for ii=1:nAnchor % check for similarity of anchor structure 139 | frequencyAnchor1 = mObject1.anchorFrequency(ii,:)';% 01/Oct./2005 by HK 140 | frequencyAnchor2 = mObject2.anchorFrequency(ii,:)';% 01/Oct./2005 by HK 141 | if (sum(frequencyAnchor1>0) ~= sum(frequencyAnchor2>0)) | ... 142 | (sum(frequencyAnchor1<0) ~= sum(frequencyAnchor2<0)) 143 | display('Warning!! Object structures are inconsistent!'); % 01/Oct./2005 by HK 144 | return; 145 | end; 146 | end; 147 | mObject3 = createMobject; 148 | m0bject3.samplingFrequency = mObject1.samplingFrequency; 149 | m0bject3.frameUpdateInterval = mObject1.frameUpdateInterval; 150 | return; 151 | 152 | %%%-------- 153 | function mixRate = checkForMorphingConditions(mRate); 154 | % 04/Oct./2005 added by HK 155 | 156 | if ~isstruct(mRate) 157 | mixRate.F0 = mRate; 158 | mixRate.spectrum = mRate; 159 | mixRate.aperiodicity = mRate; 160 | mixRate.coordinate = mRate; 161 | return; 162 | end; 163 | mixRate.F0 = mRate.F0; 164 | mixRate.spectrum = mRate.spectrum; 165 | mixRate.aperiodicity = mRate.aperiodicity; 166 | mixRate.coordinate = mRate.coordinate; 167 | return; 168 | 169 | %%%-------- 170 | function fmapFrom1to2OnTime1 = generateFrequencyMap(mObject1,mObject2); 171 | 172 | dtFrame = mObject1.frameUpdateInterval; 173 | endLocation1 = (length(mObject1.F0)-1)*dtFrame; % in ms 174 | timeAnchor1 = [0;mObject1.anchorTimeLocation;endLocation1]; 175 | locationOn1 = (0:length(mObject1.F0)-1)*dtFrame; 176 | fs = mObject1.samplingFrequency; 177 | [nr1,nc1] = size(mObject1.spectrogram); 178 | nAnchor = length(mObject1.anchorTimeLocation); 179 | fmapFrom1to2 = zeros(nr1,nAnchor); 180 | frequencyAxis = (0:nr1-1)'/(nr1-1)*fs/2; 181 | numberOfFrequencyAnchors = zeros(nAnchor,1); 182 | for ii=1:nAnchor 183 | frequencyAnchor1 = mObject1.anchorFrequency(ii,:)'; 184 | frequencyAnchor1 = [0;frequencyAnchor1(frequencyAnchor1>0);fs/2]; 185 | numberOfFrequencyAnchors(ii) = length(frequencyAnchor1(frequencyAnchor1>0)); 186 | frequencyAnchor2 = mObject2.anchorFrequency(ii,:)'; 187 | frequencyAnchor2 = [0;frequencyAnchor2(frequencyAnchor2>0);fs/2]; 188 | fmapFrom1to2(:,ii) = interp1(frequencyAnchor1,frequencyAnchor2,frequencyAxis); 189 | end; 190 | for ii=1:nAnchor 191 | if numberOfFrequencyAnchors(ii) == 1 192 | if numberOfFrequencyAnchors(min(ii+1,nAnchor)) > 1 193 | fmapFrom1to2(:,ii) = fmapFrom1to2(:,min(ii+1,nAnchor)); 194 | elseif numberOfFrequencyAnchors(max(ii-1,1)) > 1 195 | fmapFrom1to2(:,ii) = fmapFrom1to2(:,max(ii-1,1)); 196 | end; 197 | end; 198 | end; 199 | fmapFrom1to2 = [fmapFrom1to2(:,1) fmapFrom1to2 fmapFrom1to2(:,nAnchor)]; 200 | fmapFrom1to2OnTime1 = interp1(timeAnchor1,fmapFrom1to2',locationOn1)'; 201 | return; 202 | 203 | %%%------- 204 | function cleanedUpObject = checkForIntegrity(inputObject); 205 | 206 | maximumIndex = max([length(inputObject.F0), ... 207 | size(inputObject.spectrogram,2) ... 208 | size(inputObject.aperiodicityIndex,2)]); 209 | if length(inputObject.F0) < maximumIndex 210 | inputObject.F0 = [inputObject.F0(:);inputObject.F0(end)*ones(maximumIndex - length(inputObject.F0),1)]; 211 | end; 212 | if size(inputObject.spectrogram,2) < maximumIndex 213 | numberOfFillIn = maximumIndex-size(inputObject.spectrogram,2); 214 | inputObject.spectrogram = [inputObject.spectrogram inputObject.spectrogram(:,end)*ones(1,numberOfFillIn)]; 215 | end; 216 | if size(inputObject.aperiodicityIndex,2) < maximumIndex 217 | numberOfFillIn = maximumIndex-size(inputObject.aperiodicityIndex,2); 218 | inputObject.aperiodicityIndex = [inputObject.aperiodicityIndex inputObject.aperiodicityIndex(:,end)*ones(1,numberOfFillIn)]; 219 | end; 220 | cleanedUpObject = inputObject; 221 | 222 | -------------------------------------------------------------------------------- /morphing_src/timeFrequencySTRAIGHTmorphingExt.m: -------------------------------------------------------------------------------- 1 | function mObject3 = timeFrequencySTRAIGHTmorphingExt(mObject1,mObject2,mRate,mixMethod); 2 | % Morphing based on STRAIGHT parameters 3 | % mObject3 = timeFrequencySTRAIGHTmorphing(mObject1,mObject2,mRate,mixMethod); 4 | 5 | % Designed and coded by Hideki Kawahara 6 | % 28/Feb./2005 7 | % Copyright(c) 2005, Hideki Kawahara 8 | % 14/March/2005 bug fix on sampling frequency 9 | % 01/Oct./2005 bug fix on similarity check 10 | % 04/Oct./2005 partial morphing extension 11 | % 18/Oct./2005 direct differential manipulation and API cange 12 | % 29/Jan./2006 bug fix on boundary conditions 13 | % 24/Oct./2006 modificaton of definition 14 | 15 | switch nargin 16 | case 0 17 | mObject3.morphingObject = createMobject; 18 | mixRate.F0 = 0; 19 | mixRate.spectrum = 0; 20 | mixRate.aperiodicity = 0; 21 | mixRate.timeCoordinate = 0; 22 | mixRate.freqCoordinate = 0; 23 | mObject3.mixRate = mixRate; 24 | mObject3.mixMethods = {'linear','log','differentialLogarithm'}; 25 | return 26 | end; 27 | if ~isfield(mObject1,'vuv') 28 | mObject1.vuv = (mObject1.F0>0); 29 | elseif length(mObject1.vuv) == 0 30 | mObject1.vuv = (mObject1.F0>0); 31 | end; 32 | if ~isfield(mObject2,'vuv') 33 | mObject2.vuv = (mObject2.F0>0); 34 | elseif length(mObject2.vuv) == 0 35 | mObject2.vuv = (mObject2.F0>0); 36 | end; 37 | mObject3 = checkForSimilarity(mObject1,mObject2); 38 | mixRate = checkForMorphingConditions(mRate); 39 | mObject1 = checkForIntegrity(mObject1); 40 | mObject2 = checkForIntegrity(mObject2); 41 | fs = mObject1.samplingFrequency; 42 | if length(mObject3) ==0;return;end; 43 | dtFrame = mObject1.frameUpdateInterval; 44 | endLocation1 = (length(mObject1.F0)-1)*dtFrame; % in ms 45 | endLocation2 = (length(mObject2.F0)-1)*dtFrame; % in ms 46 | timeAnchor1 = [0;mObject1.anchorTimeLocation;endLocation1]; 47 | timeAnchor2 = [0;mObject2.anchorTimeLocation;endLocation2]; 48 | locationOn1 = (0:length(mObject1.F0)-1)*dtFrame; 49 | locationOn2 = (0:length(mObject2.F0)-1)*dtFrame; 50 | mapFrom1to2 = interp1(timeAnchor1,timeAnchor2,locationOn1); 51 | [nr1,nc1] = size(mObject1.spectrogram); 52 | [nr2,nc2] = size(mObject2.spectrogram); 53 | 54 | %---- initialize frequency mapping function 55 | fmapFrom1to2OnTime1 = generateFrequencyMap(mObject1,mObject2); 56 | 57 | %---- mixing on mObject1's time axis 58 | nAxis1 = length(locationOn1); 59 | nAxis2 = length(locationOn2); 60 | morphedF0 = zeros(nAxis1,1); 61 | morphedvuv = zeros(nAxis1,1); 62 | morphedAp = zeros(nr1,nAxis1); 63 | morphedSgram = zeros(nr1,nAxis1); 64 | weightSumF0 = zeros(nAxis1,1); 65 | for ii=1:nAxis1 66 | mappedIndexOn2 = mapFrom1to2(ii)/dtFrame+1; 67 | iFloor = floor(mappedIndexOn2); 68 | iFraction = mappedIndexOn2-iFloor; 69 | fIndex = floor(fmapFrom1to2OnTime1(:,ii)/fs*2*(nr1-1))+1; 70 | dAp = iFraction*(mObject2.aperiodicityIndex(:,min(iFloor+1,nAxis2))-mObject2.aperiodicityIndex(:,min(iFloor,nAxis2))); 71 | ap2on2faxis = mObject2.aperiodicityIndex(:,min(iFloor,nAxis2))+dAp; 72 | ap2on1faxis = ap2on2faxis(fIndex); 73 | morphedAp(:,ii) = (1-mixRate.aperiodicity)*mObject1.aperiodicityIndex(:,ii)+mixRate.aperiodicity*ap2on1faxis; %04/Oct/2005 HK 74 | switch mixMethod 75 | case 'linear' 76 | dSgram = iFraction*(mObject2.spectrogram(:,min(iFloor+1,nAxis2))-mObject2.spectrogram(:,min(iFloor,nAxis2))); 77 | sgram2on2faxis = mObject2.spectrogram(:,min(iFloor,nAxis2))+dSgram; 78 | sgram2on1faxis = sgram2on2faxis(fIndex); 79 | morphedSgram(:,ii) = (1-mixRate.spectrum)*mObject1.spectrogram(:,ii)+mixRate.spectrum*sgram2on1faxis; 80 | case 'log' 81 | dSgram = iFraction*(log(mObject2.spectrogram(:,min(iFloor+1,nAxis2)))-log(mObject2.spectrogram(:,min(iFloor,nAxis2)))); 82 | sgram2on2faxis = log(mObject2.spectrogram(:,min(iFloor,nAxis2)))+dSgram; 83 | sgram2on1faxis = sgram2on2faxis(fIndex); 84 | tmp = (1-mixRate.spectrum)*log(mObject1.spectrogram(:,ii))+mixRate.spectrum*sgram2on1faxis; 85 | morphedSgram(:,ii) = exp(tmp); 86 | case 'differentialLogarithm' 87 | dSgram = iFraction*(mObject2.spectrogram(:,min(iFloor+1,nAxis2))-mObject2.spectrogram(:,min(iFloor,nAxis2))); 88 | sgram2on2faxis = mObject2.spectrogram(:,min(iFloor,nAxis2))+dSgram; 89 | sgram2on1faxis = sgram2on2faxis(fIndex); 90 | tmp = (1-mixRate.spectrum)*log(mObject1.spectrogram(:,ii))+mixRate.spectrum*sgram2on1faxis; 91 | morphedSgram(:,ii) = exp(tmp); 92 | end; 93 | if mObject1.F0(ii)>0 94 | morphedF0(ii) = (1-mixRate.F0)*log(mObject1.F0(ii)); 95 | weightSumF0(ii) = (1-mixRate.F0); 96 | end; 97 | if (mObject2.F0(iFloor)>0) & (mObject2.F0(min(iFloor+1,nAxis2))>0) 98 | dF0 = iFraction*(log(mObject2.F0(min(iFloor+1,nAxis2)))-log(mObject2.F0(min(iFloor,nAxis2)))); 99 | morphedF0(ii) = mixRate.F0*(log(mObject2.F0(min(iFloor,nAxis2)))+dF0)+morphedF0(ii); 100 | weightSumF0(ii) = weightSumF0(ii)+mixRate.F0; 101 | end; 102 | morphedvuv(ii) = ((mObject1.vuv(ii)*abs(1-mixRate.F0)+abs(mixRate.F0)*mObject2.vuv(min(iFloor,nAxis2)))>0); 103 | end; 104 | morphedF0(weightSumF0>0) = exp(morphedF0(weightSumF0>0)./weightSumF0(weightSumF0>0)); 105 | 106 | %----- mapping back onto morphed time axis 107 | timeAnchorMorph = (1-mixRate.timeCoordinate)*timeAnchor1 + mixRate.timeCoordinate*timeAnchor2; 108 | locationOnMorph = (0:(timeAnchorMorph(end)/dtFrame))*dtFrame; 109 | mapFormMorphTo1 = interp1(timeAnchorMorph,timeAnchor1,locationOnMorph); 110 | nAxisMorph = length(locationOnMorph); 111 | morphedApOnMorph = zeros(nr1,nAxisMorph); 112 | morphedSgramOnMorph = zeros(nr1,nAxisMorph); 113 | morphedF0onMorph = zeros(nAxisMorph,1); 114 | morphedVUVonMorph = zeros(nAxisMorph,1); 115 | %----- set place holders 116 | mObject3.samplingFrequency = fs; 117 | mObject3.F0 = morphedF0onMorph; 118 | mObject3.vuv = morphedVUVonMorph; 119 | mObject3.aperiodicityIndex = morphedApOnMorph; 120 | mObject3.spectrogram = morphedSgramOnMorph; 121 | mObject3.anchorTimeLocation = timeAnchorMorph(2:end-1); 122 | mObject3.anchorFrequency = (1-mixRate.freqCoordinate)*mObject1.anchorFrequency+mixRate.freqCoordinate*mObject2.anchorFrequency; 123 | %------ nitialize frequency mapping function 124 | fmapFromMorphto1OnTimeMorph = generateFrequencyMap(mObject3,mObject1); 125 | for ii=1:nAxisMorph 126 | mappedIndexOn1 = mapFormMorphTo1(ii)/dtFrame+1; 127 | iFloor = floor(mappedIndexOn1); 128 | iFraction = mappedIndexOn1-iFloor; 129 | fIndex = floor(fmapFromMorphto1OnTimeMorph(:,ii)/fs*2*(nr1-1))+1; 130 | morphedApOnMorph(:,ii) = morphedAp(fIndex,iFloor) ... 131 | +iFraction*(morphedAp(fIndex,min(iFloor+1,nAxis1))-morphedAp(fIndex,iFloor)); 132 | morphedSgramOnMorph(:,ii) = morphedSgram(fIndex,iFloor) ... 133 | +iFraction*(morphedSgram(fIndex,min(iFloor+1,nAxis1))-morphedSgram(fIndex,iFloor)); 134 | if (morphedF0(iFloor)>0) & (morphedF0(min(iFloor+1,nAxis1))>0) 135 | dF0 = iFraction*(morphedF0(min(iFloor+1,nAxis1))-morphedF0(iFloor)); 136 | morphedF0onMorph(ii) = morphedF0(iFloor)+dF0; 137 | end; 138 | morphedVUVonMorph(ii) = morphedvuv(iFloor); 139 | end; 140 | mObject3.F0 = morphedF0onMorph; 141 | mObject3.vuv = morphedVUVonMorph; 142 | mObject3.aperiodicityIndex = morphedApOnMorph; 143 | mObject3.spectrogram = morphedSgramOnMorph; 144 | mObject3.anchorTimeLocation = timeAnchorMorph(2:end-1); 145 | %mObject3.anchorFrequency = (1-mRate)*mObject1.anchorFrequency+mRate*mObject2.anchorFrequency; 146 | %mObject3 = fmapFromMorphto1OnTimeMorph; % This line is a dummy. 147 | return; 148 | 149 | %%% ------ Internal function to check for object's similarity 150 | function mObject3 = checkForSimilarity(mObject1,mObject2) 151 | mObject3 = []; 152 | if mObject1.samplingFrequency ~= mObject2.samplingFrequency;mObject3 = [];return;end; 153 | if mObject1.frameUpdateInterval ~= mObject2.frameUpdateInterval;mObject3 = [];return;end; 154 | if length(mObject1.anchorTimeLocation) ~= length(mObject2.anchorTimeLocation);mObject3 = [];return;end; 155 | nAnchor = length(mObject1.anchorTimeLocation); 156 | for ii=1:nAnchor % check for similarity of anchor structure 157 | frequencyAnchor1 = mObject1.anchorFrequency(ii,:)';% 01/Oct./2005 by HK 158 | frequencyAnchor2 = mObject2.anchorFrequency(ii,:)';% 01/Oct./2005 by HK 159 | if (sum(frequencyAnchor1>0) ~= sum(frequencyAnchor2>0)) | ... 160 | (sum(frequencyAnchor1<0) ~= sum(frequencyAnchor2<0)) 161 | display('Warning!! Object structures are inconsistent!'); % 01/Oct./2005 by HK 162 | return; 163 | end; 164 | end; 165 | mObject3 = createMobject; 166 | m0bject3.samplingFrequency = mObject1.samplingFrequency; 167 | m0bject3.frameUpdateInterval = mObject1.frameUpdateInterval; 168 | return; 169 | 170 | %%%-------- 171 | function mixRate = checkForMorphingConditions(mRate); 172 | % 04/Oct./2005 added by HK 173 | 174 | if ~isstruct(mRate) 175 | mixRate.F0 = mRate; 176 | mixRate.spectrum = mRate; 177 | mixRate.aperiodicity = mRate; 178 | mixRate.timeCoordinate = mRate; 179 | mixRate.freqCoordinate = mRate; 180 | return; 181 | end; 182 | mixRate.F0 = mRate.F0; 183 | mixRate.spectrum = mRate.spectrum; 184 | mixRate.aperiodicity = mRate.aperiodicity; 185 | mixRate.timeCoordinate = mRate.timeCoordinate; 186 | mixRate.freqCoordinate = mRate.freqCoordinate; 187 | return; 188 | 189 | %%%-------- 190 | function fmapFrom1to2OnTime1 = generateFrequencyMap(mObject1,mObject2); 191 | 192 | dtFrame = mObject1.frameUpdateInterval; 193 | endLocation1 = (length(mObject1.F0)-1)*dtFrame; % in ms 194 | timeAnchor1 = [0;mObject1.anchorTimeLocation;endLocation1]; 195 | locationOn1 = (0:length(mObject1.F0)-1)*dtFrame; 196 | fs = mObject1.samplingFrequency; 197 | [nr1,nc1] = size(mObject1.spectrogram); 198 | nAnchor = length(mObject1.anchorTimeLocation); 199 | fmapFrom1to2 = zeros(nr1,nAnchor); 200 | frequencyAxis = (0:nr1-1)'/(nr1-1)*fs/2; 201 | numberOfFrequencyAnchors = zeros(nAnchor,1); 202 | for ii=1:nAnchor 203 | frequencyAnchor1 = mObject1.anchorFrequency(ii,:)'; 204 | frequencyAnchor1 = [0;frequencyAnchor1(frequencyAnchor1>0);fs/2]; 205 | numberOfFrequencyAnchors(ii) = length(frequencyAnchor1(frequencyAnchor1>0)); 206 | frequencyAnchor2 = mObject2.anchorFrequency(ii,:)'; 207 | frequencyAnchor2 = [0;frequencyAnchor2(frequencyAnchor2>0);fs/2]; 208 | fmapFrom1to2(:,ii) = interp1(frequencyAnchor1,frequencyAnchor2,frequencyAxis); 209 | end; 210 | for ii=1:nAnchor 211 | if numberOfFrequencyAnchors(ii) == 1 212 | if numberOfFrequencyAnchors(min(ii+1,nAnchor)) > 1 213 | fmapFrom1to2(:,ii) = fmapFrom1to2(:,min(ii+1,nAnchor)); 214 | elseif numberOfFrequencyAnchors(max(ii-1,1)) > 1 215 | fmapFrom1to2(:,ii) = fmapFrom1to2(:,max(ii-1,1)); 216 | end; 217 | end; 218 | end; 219 | fmapFrom1to2 = [fmapFrom1to2(:,1) fmapFrom1to2 fmapFrom1to2(:,nAnchor)]; 220 | fmapFrom1to2OnTime1 = interp1(timeAnchor1,fmapFrom1to2',locationOn1)'; 221 | return; 222 | 223 | %%%------- 224 | function cleanedUpObject = checkForIntegrity(inputObject); 225 | 226 | maximumIndex = max([length(inputObject.F0), ... 227 | size(inputObject.spectrogram,2) ... 228 | size(inputObject.aperiodicityIndex,2)]); 229 | if length(inputObject.F0) < maximumIndex 230 | inputObject.F0 = [inputObject.F0(:);inputObject.F0(end)*ones(maximumIndex - length(inputObject.F0),1)]; 231 | end; 232 | if size(inputObject.spectrogram,2) < maximumIndex 233 | numberOfFillIn = maximumIndex-size(inputObject.spectrogram,2); 234 | inputObject.spectrogram = [inputObject.spectrogram inputObject.spectrogram(:,end)*ones(1,numberOfFillIn)]; 235 | end; 236 | if size(inputObject.aperiodicityIndex,2) < maximumIndex 237 | numberOfFillIn = maximumIndex-size(inputObject.aperiodicityIndex,2); 238 | inputObject.aperiodicityIndex = [inputObject.aperiodicityIndex inputObject.aperiodicityIndex(:,end)*ones(1,numberOfFillIn)]; 239 | end; 240 | cleanedUpObject = inputObject; 241 | 242 | -------------------------------------------------------------------------------- /morphing_src/tmang.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/morphing_src/tmang.wav -------------------------------------------------------------------------------- /morphing_src/tmneu.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/morphing_src/tmneu.wav -------------------------------------------------------------------------------- /morphing_src/updateFieldOfMobject.m: -------------------------------------------------------------------------------- 1 | function mObject = updateFieldOfMobject(mObject,fieldName,fieldValue) 2 | 3 | if isfield(mObject,fieldName) 4 | mObject = setfield(mObject,fieldName,fieldValue); 5 | else 6 | disp([fieldName ' is not in a Mobject.']); 7 | end; 8 | 9 | -------------------------------------------------------------------------------- /morphing_src/waveformMorphing.m: -------------------------------------------------------------------------------- 1 | function mObject3 = waveformMorphing(mObject1,mObject2,mRate); 2 | % Morphing with minimum information 3 | % (Actually this is not real morphing. 4 | % It is simply blending two waveform.) 5 | % mObject3 = waveformMorphing(mObject1,mObject2,mRate); 6 | 7 | % Designed and coded by Hideki Kawahara 8 | % 27/Feb./2005 9 | % Copyright(c) 2005, Hideki Kawahara 10 | 11 | nLength = max(length(mObject1.waveform),length(mObject2.waveform)); 12 | if mObject1.samplingFrequency ~= mObject2.samplingFrequency 13 | mObject3 = []; 14 | return 15 | end; 16 | x = zeros(nLength,1); 17 | x(1:length(mObject1.waveform)) = (1-mRate)*mObject1.waveform; 18 | x(1:length(mObject2.waveform)) = mRate*mObject2.waveform + x(1:length(mObject2.waveform)); 19 | 20 | mObject3=createMobject; 21 | mObject3.waveform = x; 22 | mObject3.samplingFrequency = mObject1.samplingFrequency; 23 | -------------------------------------------------------------------------------- /src/CheckAnalysisData.m: -------------------------------------------------------------------------------- 1 | function output = ... 2 | CheckAnalysisData(f0raw, ap, n3sgram, target_analysis_dir, tmp_name_root) 3 | output = true; 4 | tolerance = 10 ^ (-6); 5 | f0_file_path = ... 6 | [target_analysis_dir tmp_name_root 'f0.bin']; 7 | ap_file_path = ... 8 | [target_analysis_dir tmp_name_root 'ap.bin']; 9 | sp_file_path = ... 10 | [target_analysis_dir tmp_name_root 'sp.bin']; 11 | f0_ref = ReadBinaryData(f0_file_path); 12 | ap_ref = ReadBinaryData(ap_file_path); 13 | sp_ref = ReadBinaryData(sp_file_path); 14 | f0_median = median(f0_ref(f0_ref > 30 & f0_ref < 1000)); 15 | ap_std = std(ap_ref(:)); 16 | sp_std = std(sp_ref(:)); 17 | if std(f0raw(:) - f0_ref(:)) / f0_median > tolerance 18 | return; 19 | end; 20 | if std(ap(:) - ap_ref(:)) / ap_std > tolerance 21 | return; 22 | end; 23 | if std(n3sgram(:) - sp_ref(:)) / sp_std > tolerance 24 | return; 25 | end; 26 | end -------------------------------------------------------------------------------- /src/HzToErbRate.m: -------------------------------------------------------------------------------- 1 | function y=HzToErbRate(x) 2 | % by Matrin Cooke, adopted from MAD library 3 | 4 | y=(21.4*log10(4.37e-3*x+1)); 5 | -------------------------------------------------------------------------------- /src/ReadBinaryData.m: -------------------------------------------------------------------------------- 1 | function data = ReadBinaryData(path_name) 2 | data = []; 3 | fid = fopen(path_name); 4 | magic = int8('magic'); 5 | read_magic = fread(fid, 5, 'int8'); 6 | for ii = 1:5 7 | if magic(ii) ~= read_magic(ii) 8 | return; 9 | end; 10 | end; 11 | n_row = fread(fid, 1, 'int32'); 12 | n_column = fread(fid, 1, 'int32'); 13 | data = zeros(n_row, n_column); 14 | for ii = 1:n_row 15 | data(ii, :) = double(fread(fid, n_column, 'float32')); 16 | end; 17 | fclose(fid); 18 | end -------------------------------------------------------------------------------- /src/SynthesizeLegacy_STRAIGHT_default.m: -------------------------------------------------------------------------------- 1 | function syntheszed_signal = SynthesizeLegacy_STRAIGHT_default(x, fs) 2 | % Conditions are based on the web document 3 | % 4 | 5 | f0raw = MulticueF0v14(x,fs); 6 | ap = exstraightAPind(x,fs,f0raw); 7 | n3sgram=exstraightspec(x,f0raw,fs); 8 | syntheszed_signal = exstraightsynth(f0raw,n3sgram,ap,fs); 9 | end -------------------------------------------------------------------------------- /src/TestAnalysisRegression.m: -------------------------------------------------------------------------------- 1 | function output = TestAnalysisRegression(n_test) 2 | if ~isOctave 3 | rng('shuffle'); % initialize frozen random number 4 | end; 5 | output = false; 6 | original_speech_dir = '/Users/kawahara/Music/VCTK_CORPUS/VCTK-Corpus/wav48/'; 7 | target_analysis_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/analysisData/'; 8 | target_wave_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/waveData/'; 9 | target_files = dir([target_wave_dir '*.wav']); 10 | n_files = length(target_files); 11 | selected_id = randi(n_files, n_test); 12 | for ii = 1:n_test 13 | tmp_name = target_files(selected_id(ii)).name; 14 | [x, fs] = audioread([original_speech_dir tmp_name(1:4) '/' tmp_name]); 15 | disp([num2str(ii) ': ' tmp_name ' ' datestr(now)]); 16 | if isOctave 17 | eval(command1); 18 | eval(command2); 19 | else 20 | rng(12345); % initialize frozen random number 21 | end; 22 | f0raw = MulticueF0v14(x, fs); 23 | ap = exstraightAPind(x, fs, f0raw); 24 | n3sgram=exstraightspec(x, f0raw, fs); 25 | tmp_name_root = tmp_name(1:end - 4); 26 | if ~CheckAnalysisData(f0raw, ap, n3sgram, target_analysis_dir, tmp_name_root) 27 | disp(['Failed: ' tmp_name ' data is not similar.']); 28 | return; 29 | end; 30 | %y = exstraightsynth(f0raw,n3sgram,ap,fs); 31 | end; 32 | disp(['Success! ' num2str(n_test) ' files are passed analysis regression.']); 33 | output = true; 34 | end -------------------------------------------------------------------------------- /src/TestAnalysisRegressionR.m: -------------------------------------------------------------------------------- 1 | function output = TestAnalysisRegressionR(n_test) 2 | output = false; 3 | original_speech_dir = '/Users/kawahara/Music/VCTK_CORPUS/VCTK-Corpus/wav48/'; 4 | target_analysis_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/analysisDataR/'; 5 | target_wave_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/waveDataR/'; 6 | target_files = dir([target_wave_dir '*.wav']); 7 | n_files = length(target_files); 8 | selected_id = randi(n_files, n_test); 9 | for ii = 1:n_test 10 | tmp_name = target_files(selected_id(ii)).name; 11 | [x, fs] = audioread([original_speech_dir tmp_name(1:4) '/' tmp_name]); 12 | disp([num2str(ii) ': ' tmp_name ' ' datestr(now)]); 13 | if ~isOctave; rng(12345); end; % initialize frozen random number 14 | f0raw = MulticueF0v14(x, fs); 15 | ap = exstraightAPind(x, fs, f0raw); 16 | n3sgram=exstraightspec(x, f0raw, fs); 17 | tmp_name_root = tmp_name(1:end - 4); 18 | if ~CheckAnalysisData(f0raw, ap, n3sgram, target_analysis_dir, tmp_name_root) 19 | disp(['Failed: ' tmp_name ' data is not similar.']); 20 | return; 21 | end; 22 | %y = exstraightsynth(f0raw,n3sgram,ap,fs); 23 | end; 24 | disp(['Success! ' num2str(n_test) ' files are passed analysis regression.']); 25 | output = true; 26 | end -------------------------------------------------------------------------------- /src/TestCopySynthRegression.m: -------------------------------------------------------------------------------- 1 | function output = TestCopySynthRegression(n_test) 2 | output = false; 3 | original_speech_dir = '~/Music/VCTK_CORPUS/VCTK-Corpus/wav48/'; 4 | target_analysis_dir = '~/m-file/STRAIGHTV40_007e/analysisData/'; 5 | target_wave_dir = '~/m-file/STRAIGHTV40_007e/waveData/'; 6 | target_files = dir([target_wave_dir '*.wav']); 7 | n_files = length(target_files); 8 | selected_id = randi(n_files, n_test); 9 | for ii = 1:n_test 10 | tmp_name = target_files(selected_id(ii)).name; 11 | [x, fs] = audioread([original_speech_dir tmp_name(1:4) '/' tmp_name]); 12 | disp([num2str(ii) ': ' tmp_name ' ' datestr(now)]); 13 | rng(12345); % initialize frozen random number 14 | f0raw = MulticueF0v14(x, fs); 15 | ap = exstraightAPind(x, fs, f0raw); 16 | n3sgram=exstraightspec(x, f0raw, fs); 17 | tmp_name_root = tmp_name(1:end - 4); 18 | if ~CheckAnalysisData(f0raw, ap, n3sgram, target_analysis_dir, tmp_name_root) 19 | disp(['Failed: ' tmp_name ' data is not similar.']); 20 | end; 21 | wave_pathname = [target_wave_dir tmp_name]; 22 | [sy, fs] = audioread(wave_pathname); 23 | y = exstraightsynth(f0raw,n3sgram,ap,fs); 24 | if std(sy - y / max(abs(y)) * 0.9) / std(sy) > 10 ^ (-3) 25 | disp(['Failed! ' tmp_name ' copy synthesis test.']); 26 | keyboard 27 | return; 28 | end; 29 | end; 30 | disp(['Success! ' num2str(n_test) ' files are passed copy-synth regression.']); 31 | output = true; 32 | end -------------------------------------------------------------------------------- /src/TestCopySynthRegressionR.m: -------------------------------------------------------------------------------- 1 | function output = TestCopySynthRegressionR(n_test) 2 | output = false; 3 | original_speech_dir = '/Users/kawahara/Music/VCTK_CORPUS/VCTK-Corpus/wav48/'; 4 | if isOctave 5 | target_analysis_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/analysisDataO/'; 6 | target_wave_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/waveDataO/'; 7 | else 8 | target_analysis_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/analysisDataR/'; 9 | target_wave_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/waveDataR/'; 10 | end; 11 | target_files = dir([target_wave_dir '*.wav']); 12 | n_files = length(target_files); 13 | selected_id = randi(n_files, n_test); 14 | command1 = 'rand("seed", 12345);'; 15 | command2 = 'randn("seed", 12345);'; 16 | for ii = 1:n_test 17 | tmp_name = target_files(selected_id(ii)).name; 18 | [x, fs] = audioread([original_speech_dir tmp_name(1:4) '/' tmp_name]); 19 | disp([num2str(ii) ': ' tmp_name ' ' datestr(now)]); 20 | if isOctave 21 | eval(command1); 22 | eval(command2); 23 | else 24 | rng(12345); % initialize frozen random number 25 | end; 26 | f0raw = MulticueF0v14(x, fs); 27 | ap = exstraightAPind(x, fs, f0raw); 28 | n3sgram=exstraightspec(x, f0raw, fs); 29 | tmp_name_root = tmp_name(1:end - 4); 30 | if ~CheckAnalysisData(f0raw, ap, n3sgram, target_analysis_dir, tmp_name_root) 31 | disp(['Failed: ' tmp_name ' data is not similar.']); 32 | end; 33 | wave_pathname = [target_wave_dir tmp_name]; 34 | [sy, fs] = audioread(wave_pathname); 35 | if isOctave 36 | eval(command1); 37 | eval(command2); 38 | else 39 | rng(12345); % initialize frozen random number 40 | end; 41 | y = exstraightsynth(f0raw,n3sgram,ap,fs); 42 | disp(['Relative error SD: ' num2str(100 * std(sy - y / max(abs(y)) * 0.9) / std(sy)) ' %']); 43 | if std(sy - y / max(abs(y)) * 0.9) / std(sy) > 10 ^ (-3) 44 | disp(['Failed! ' tmp_name ' copy synthesis test.']); 45 | keyboard 46 | return; 47 | end; 48 | end; 49 | disp(['Success! ' num2str(n_test) ' files are passed copy-synth regression.']); 50 | output = true; 51 | end -------------------------------------------------------------------------------- /src/WriteBinaryData.m: -------------------------------------------------------------------------------- 1 | function WriteBinaryData(path_name, data) 2 | [n_row, n_column] = size(data); 3 | fid = fopen(path_name, 'w'); 4 | magic = int8('magic'); 5 | fwrite(fid, magic, 'int8'); 6 | fwrite(fid, int32(n_row), 'int32'); 7 | fwrite(fid, int32(n_column), 'int32'); 8 | for ii = 1:n_row 9 | fwrite(fid, single(data(ii, :)), 'float32'); 10 | end; 11 | fclose(fid); 12 | end -------------------------------------------------------------------------------- /src/aiffread.m: -------------------------------------------------------------------------------- 1 | function [x,fs]=aiffread(fname) 2 | 3 | 4 | % function [x,fs]=aiffread(fname) 5 | % Read AIFF and AIFF-C file 6 | % This is a reduced version and does not fulfill the 7 | % AIFF-C standard. 8 | 9 | % Coded by Hideki Kawahara based on "Audio Interchange file format AIFF-C draft" 10 | % by Apple Computer inc. 8/26/91 11 | % 14/Feb./1998 12 | % 17/Feb./1998 13 | % 14/Jan./1999 bug fix for Windows 14 | 15 | fid=fopen(fname,'r','ieee-be.l64'); 16 | id.form=fread(fid,4,'char'); 17 | id.formcksz=fread(fid,1,'int32'); 18 | id.formtp=fread(fid,4,'char'); 19 | x=[];fs=44100; 20 | if ~strcmp(char(id.form),['F';'O';'R';'M']) 21 | char(id.form) 22 | disp('This is not a proper AIFF file.'); 23 | return; 24 | end; 25 | if ~strcmp(char(id.formtp),['A';'I';'F';'F']) && ~strcmp(char(id.formtp),['A';'I';'F';'C']) 26 | char(id.formtp) 27 | disp('This is not a proper AIFF file.'); 28 | return; 29 | end; 30 | [id.comm,na]=fread(fid,4,'uchar'); 31 | while na>3 32 | switch(strcat(char(id.comm)')) 33 | case 'FVER' 34 | id.fsize=fread(fid,1,'int32'); 35 | id.timesta=fread(fid,1,'uint32'); 36 | if id.timesta ~= 2726318400 37 | disp(['I cannot recognize timestump ' num2str(id.timesta)]); 38 | end; 39 | [id.comm,na]=fread(fid,4,'uchar'); 40 | if na==0 41 | if isempty(x); disp('End of file reached!');fclose(fid);return;end; 42 | end; 43 | case 'COMM' 44 | id.commsz=fread(fid,1,'int32'); 45 | id.commnch=fread(fid,1,'int16'); 46 | id.commdsz=fread(fid,1,'uint32'); 47 | id.samplesize=fread(fid,1,'int16'); 48 | id.srex1=fread(fid,1,'uint16'); 49 | id.srex2=fread(fid,1,'uint64'); 50 | if strcmp(char(id.formtp),['A';'I';'F';'C']) 51 | id.compress=fread(fid,4,'char'); 52 | if ~strcmp(char(id.compress),['N';'O';'N';'E']) 53 | disp('Compression is not supported.'); 54 | return; 55 | end; 56 | fread(fid,id.commsz-22,'char'); 57 | end; 58 | fs=2^(id.srex1-16383)*id.srex2/hex2dec('8000000000000000'); 59 | [id.comm,na]=fread(fid,4,'uchar'); 60 | if na==0 61 | if isempty(x); disp('End of file reached!');fclose(fid);return;end; 62 | end; 63 | case 'SSND' 64 | id.ckdatasize=fread(fid,1,'uint32'); 65 | id.offset=fread(fid,1,'int32'); 66 | id.blksz=fread(fid,1,'int32'); 67 | switch(id.samplesize) 68 | case 8 69 | x=fread(fid,id.ckdatasize-8,'int8'); 70 | x=reshape(x,id.commnch,id.commsz)'; 71 | case 16 72 | x=fread(fid,(id.ckdatasize-8)/2,'int16'); 73 | x=reshape(x,id.commnch,id.commdsz)'; 74 | case 24 75 | x=fread(fid,(id.ckdatasize-8)/3,'bit24'); 76 | x=reshape(x,id.commnch,id.commdsz)'; 77 | end; 78 | [id.comm,na]=fread(fid,4,'uchar'); 79 | if na==0 80 | if isempty(x); disp('End of file reached!');fclose(fid);return;end; 81 | end; 82 | otherwise 83 | id.fsize=fread(fid,1,'int32'); 84 | if feof(fid) || id.fsize > id.formcksz || id.fsize <=0 85 | fclose(fid); 86 | return; 87 | end; 88 | id.skip=fread(fid,id.fsize,'char'); 89 | [id.comm,na]=fread(fid,4,'uchar'); 90 | if na==0 91 | if isempty(x); disp('End of file reached!');fclose(fid);return;end; 92 | end; 93 | end; 94 | end; 95 | %id 96 | fclose(fid); 97 | 98 | -------------------------------------------------------------------------------- /src/aiffwrite.m: -------------------------------------------------------------------------------- 1 | function ok=aiffwrite(x,fs,nbits,fname) 2 | 3 | % function ok=aiffwrite(x,fs,nbits,fname) 4 | % Write AIFF file 5 | % This is a reduced version and does not fulfill the 6 | % AIFF standard. 7 | 8 | % Coded by Hideki Kawahara based on "Audio Interchange file format AIFF-C draft" 9 | % by Apple Computer inc. 8/26/91 10 | % 14/Feb./1998 11 | % 14/Jan./1999 bug fix for Windows 12 | 13 | ok=1; 14 | [nr,nc]=size(x); 15 | if nc>nr 16 | ok=[]; 17 | disp('Data must be a set of column vector.'); 18 | return; 19 | end; 20 | nex=floor(log(fs)/log(2)); 21 | vv=fs/2^(nex+1)*2^(4*16); 22 | nex2=nex+16383; 23 | 24 | fid=fopen(fname,'w','ieee-be.l64'); 25 | fwrite(fid,'FORM','char'); 26 | cksize=46+nr*nc*(nbits/8); 27 | fwrite(fid,cksize,'int32'); 28 | fwrite(fid,'AIFF','char'); 29 | 30 | fwrite(fid,'COMM','char'); 31 | fwrite(fid,18,'int32'); 32 | fwrite(fid,nc,'int16'); 33 | fwrite(fid,nr,'int32'); 34 | fwrite(fid,nbits,'int16'); 35 | fwrite(fid,nex2,'uint16'); 36 | fwrite(fid,vv,'uint64'); 37 | 38 | fwrite(fid,'SSND','char'); 39 | fwrite(fid,nr*nc*(nbits/8)+8,'int32'); 40 | fwrite(fid,0,'int32'); 41 | fwrite(fid,0,'int32'); 42 | y=x'; 43 | switch(nbits) 44 | case 8 45 | fwrite(fid,y(:),'int8'); 46 | case 16 47 | fwrite(fid,y(:),'int16'); 48 | case 24 49 | fwrite(fid,y(:),'bit24'); 50 | end; 51 | fclose(fid); 52 | 53 | -------------------------------------------------------------------------------- /src/aperiodiccomp.m: -------------------------------------------------------------------------------- 1 | function ap=aperiodiccomp(apv,dpv,ashift,f0,nshift,imgi) 2 | % ap=aperiodiccomp(apv,dpv,ashift,f0,nshift,fftl,imgi); 3 | % Calculate aperiodicity index 4 | % Input parameters 5 | % apv, dpv : Upper and lower envelope 6 | % ashift : shift step for aperiodicity index calculation (ms) 7 | % f0 : fundamental frequency (Hz) 8 | % nshift : shift step for f0 information (ms) 9 | % fftl : FFT size 10 | % imgi : display indicator, 1: display on (default) 0: off 11 | 12 | % modified to add the waitbar on 08/Dec./2002 13 | % modified by Takahashi 10/Aug./2005 14 | % modified by Kawahara 10/Sept./2005 15 | 16 | if nargin==5; imgi=1; end; 17 | %[nn,mm]=size(nsgram); 18 | mm=length(f0); 19 | %%nn=fftl/2+1; 20 | [~,m2]=size(apv); 21 | 22 | x=(0:m2-1)'*ashift; 23 | xi=(0:mm-1)'*nshift; 24 | xi=min(max(x),xi); 25 | 26 | if imgi==1; hpg=waitbar(0.1,'Interpolating periodicity information'); end; 27 | if imgi==1; drawnow; end; 28 | %ap=interp1q(x,(dpv-apv)',xi)';%,'*linear')'; 29 | ap = interp1(x, (dpv-apv)',xi, 'linear', 'extrap')'; 30 | if imgi==1; close(hpg); end; 31 | 32 | -------------------------------------------------------------------------------- /src/aperiodicpartERB2.m: -------------------------------------------------------------------------------- 1 | function [apv,dpv,apve,dpve]=aperiodicpartERB2(x,fs,f0,shiftm,intshiftm,mm,imgi) 2 | % Relative aperiodic energy estimation with ERB smoothing 3 | % [apv,dpv,apve,dpve]=aperiodicpartERB2(x,fs,f0,shiftm,intshiftm,mm,imgi) 4 | % x : input speech 5 | % fs : sampling frequency (Hz) 6 | % f0 : fundamental frequency (Hz) 7 | % shiftm : frame shift (ms) for input F0 data 8 | % intshiftm : frame shift (ms) for internal processing 9 | % mm : length of frequency axis (usually 2^N+1) 10 | % imgi : display indicator, 1: display on (default) 0: off 11 | 12 | % 19/August/1999 13 | % 21/August/1999 14 | % 30/May/2001 15 | % 10/April/2002 completely rewrote 16 | % 07/Dec./2002 waitbar was added 17 | % 13/Jan./2005 bug fix 18 | % 08/April/2005 safe guard 19 | % 10/Aug./2005 modified by Takahashi on wait bar 20 | % 10/Sept./2005 modified by Kawahara on wait bar 21 | % 16/Sept./2005 minor bug fix 22 | 23 | if nargin==6; imgi=1; end; % 10/Sept./2005 24 | if imgi==1; hpg=waitbar(0,'ERB-based multiband periodicity calculation'); end; 25 | f0(isnan(f0)>0)=zeros(size(f0(isnan(f0)>0))); % safe guard 26 | lowerF0limit = 40; % safe guard 16/Sept./2005 27 | 28 | fftl=2.0^ceil(log2(6.7*fs/lowerF0limit)+1); % FFT size selection to be scalable 29 | if ~isempty(f0(f0>0));avf0=mean(f0(f0>0));else avf0=180;end; % 08/April/2005 30 | %%f0bk=f0; 31 | f0(f0==0)=f0(f0==0)+avf0; 32 | f0(f00); 57 | wcc=fftfilt(wb,[zeros(1,fftl),w,zeros(1,fftl)]); 58 | wcc=wcc/max(wcc); 59 | [~,mxp]=max(wcc); 60 | wcc=wcc-wcc(1); 61 | wcc=wcc/sum(wcc); 62 | ww=wcc(round((1:fftl)-fftl/2+mxp))'; 63 | bb=(1:fftl)-fftl/2; 64 | 65 | %----- spectrum smoother design 66 | fff=[2:fftl 1]; 67 | ffb=[fftl 1:fftl-1]; 68 | %----- lifter design 69 | qx=(0:fftl-1)/fs; 70 | lft=1.0./(1+exp((qx-1.4/40)*1000))'; 71 | lft(fftl:-1:fftl/2)=lft(2:fftl/2+2); 72 | %------ preparation for EREB smoothing 73 | 74 | evv=(0:1024)/1024*HzToErbRate(fs/2); % ERB axis for smoothing 75 | eew=1; % effective smoothing width in ERB 76 | lh=round(2*eew/evv(2)); % number of samples for 2*eew on evv axis 77 | we=hanning(lh)/sum(hanning(lh)); % Hanning window is used for smoothing 78 | bx=(1:length(evv)); % index for extraction 79 | hvv=228.8*(10.0.^(0.0467*evv)-1); % frequency axis represented in Hz 80 | hvv(1)=0; hvv(end)=fs/2; % safeguard 81 | 82 | evx=(0:0.5:max(evv)); 83 | 84 | bss=(1:fftl/2-1); 85 | bss2=1:fftl/2; 86 | 87 | apv=zeros(mm,length(tidx)); 88 | dpv=zeros(mm,length(tidx)); 89 | apve=zeros(length(evx),length(tidx)); 90 | dpve=apve; 91 | 92 | for ii=1:length(tidx); 93 | idp=round(tidx(ii))+bias; 94 | sw=abs(fft(xii(idp+bb).*ww)); 95 | sws=(sw*2+sw(ffb)+sw(fff))/4; 96 | sms=real(ifft(real(fft(log(sws))).*lft))/log(10)*20; %smoothed dB spectrum 97 | plits=[0; (((diff(sms(bss2)).*diff(sms(bss2+1)))<0).*sms(bss).*(diff(sms(bss2))>0))]; 98 | dlits=[0; (((diff(sms(bss2)).*diff(sms(bss2+1)))<0).*sms(bss).*(diff(sms(bss2))<0))]; 99 | gg=fxfi(abs(plits)>0); 100 | gfg=(sms(abs(plits)>0)); 101 | dd=fxfi(abs(dlits)>0); 102 | dfd=(sms(abs(dlits)>0)); 103 | gga=[0;gg;fs/2]*f0ii(round(tidx(ii)))/40; 104 | dda=[0;dd;fs/2]*f0ii(round(tidx(ii)))/40; 105 | dfda=[dfd(1) ;dfd ;dfd(end)]; % dip level (dB) 106 | gfga=[gfg(1); gfg ;gfg(end)]; % peak level (dB) 107 | dfdap=10.0.^(dfda/10); % dip level (power) 108 | gfgap=10.0.^(gfga/10); % peak level (power) 109 | ape=interp1(HzToErbRate(gga),gfgap,evv); % Upper power envelope on ERB 110 | dpe=interp1(HzToErbRate(dda),dfdap,evv); % Lower power envelope on ERB 111 | 112 | apef=[ape(lh:-1:2) ape ape(end-1:-1:end-lh)]; % ape with mirrored ends 113 | dpef=[dpe(lh:-1:2) dpe dpe(end-1:-1:end-lh)]; % dpe with mirrored ends 114 | 115 | apefs=fftfilt(we,apef); % smoothed ape 116 | dpefs=fftfilt(we,dpef); % smoothed dpe 117 | 118 | apefs=apefs(bx+lh-1+round(lh/2)); 119 | dpefs=dpefs(bx+lh-1+round(lh/2)); 120 | apr=interp1(hvv,apefs,fxa); % smoothed ape on linear axis 121 | dpr=interp1(hvv,dpefs,fxa); % smoothed dpe on linear axis 122 | dpv(:,ii)=dpr'; 123 | apv(:,ii)=apr'; 124 | dpve(:,ii)=interp1(evv,dpefs,evx)'; 125 | apve(:,ii)=interp1(evv,apefs,evx)'; 126 | if imgi==1 && rem(ii,2)==0 %10/Aug./2005 127 | waitbar(0.1+0.9*ii/length(tidx)); %,hpg); 128 | end; 129 | end; 130 | if imgi==1; fprintf('\n'); end;%10/Aug./2005 131 | if imgi==1; close(hpg); end;%10/Aug./2005 132 | -------------------------------------------------------------------------------- /src/boundmes2.m: -------------------------------------------------------------------------------- 1 | function bv=boundmes2(apv,dpv,fs,shiftm,intshiftm,mm) 2 | % boundary calculation for MBE model 3 | % bv=boundmes2(apv,dpv,fs,shiftm,intshiftm,mm); 4 | % apv : peak envelope 5 | % dpv : dip envelope 6 | % fs : sampling frequency (Hz) 7 | % shiftm : frame shift of F0 data 8 | % intshiftm : frame shift for envelope data 9 | % mm : number of elements in frequency axis 10 | 11 | % 01/Sept./1999 12 | % by Hideki Kawahara 13 | 14 | lx=log10((1:mm-1)/(mm-1)/2*fs); 15 | fx=(1:mm-1)/(mm-1)/2*fs; 16 | wwv=10.0.^(apv/20); 17 | lyv=((dpv-apv)/20); 18 | [~,kk]=size(apv); 19 | bv=zeros(1,kk); 20 | for ii=1:kk 21 | bv(ii)=sum((lyv(2:mm,ii)'-lx).*wwv(2:mm,ii)'./fx)/sum(wwv(2:mm,ii)./fx'); 22 | end; 23 | 24 | % Assuming shiftm >= 1 ms 25 | if ne(round(shiftm),shiftm) 26 | bv=[]; 27 | return; 28 | end; 29 | if ne(round(intshiftm),intshiftm) 30 | bv=[]; 31 | return; 32 | end; 33 | if shiftm==intshiftm 34 | return; 35 | end; 36 | if intshiftm>1 37 | bv=interp(bv,intshiftm); 38 | if shiftm>1 39 | bv=bv(1:shiftm:length(bv)); 40 | end; 41 | end; 42 | 43 | -------------------------------------------------------------------------------- /src/correctdpv.m: -------------------------------------------------------------------------------- 1 | function dpv=correctdpv(apv,dpv,shiftap,f0raw,ecrt,shiftm,fs) 2 | % dpv=correctdpv(apv,dpv,shiftap,ecrt,shiftm,fs) 3 | % Apperiodicity correction based on C/N estimation 4 | % dpv : lower spectral envelope 5 | % apv : upper spectral envelope 6 | % shiftap : frame shift for apv and dpv (ms) 7 | % f0raw : fundamental frequency (Hz) 8 | % ecrt : C/N (absolute value) 9 | % shiftm : frame shift for F0 and spectrum (ms) 10 | % fs : sampling frequency (Hz) 11 | 12 | % Designed and coded by Hideki Kawahara 13 | % 04/Feb./2003 14 | % 30/April/2005 modification for Matlab v7.0 compatibility 15 | 16 | [nn,mm]=size(apv); 17 | nf0=length(f0raw); 18 | fx=(0:nn-1)/(nn-1)/2*fs; 19 | f0raw(f0raw==0)=f0raw(f0raw==0)+40; % safe guard 20 | 21 | for ii=1:mm 22 | iif=min(nf0,round((ii-1)*shiftap/shiftm)+1); 23 | if ~isnan(ecrt(iif)) 24 | bdr=1.0./(1+exp(-(fx-2.5*f0raw(iif))/f0raw(iif)*4)); 25 | bdr=(bdr+1.0/ecrt(iif))/(1+1.0/ecrt(iif)); 26 | dpv(:,ii)=min(dpv(:,ii),apv(:,ii)+20*log10(bdr(:))); 27 | end; 28 | end; 29 | 30 | -------------------------------------------------------------------------------- /src/defaultparamsorg.m: -------------------------------------------------------------------------------- 1 | function ok=defaultparamsorg 2 | % function to define default parameters. 3 | % Please copy this file as defaultparams.m and edit 4 | % necessary parameters. 5 | % If defaultparams.m exists, definitions in defaultparams.m 6 | % override original default parameters. 7 | 8 | % 08/Dec./2002 by H.K. 9 | 10 | global f0floor f0ceil fs framem shiftm f0shiftm ... 11 | fftl eta pc framel fftl2 acth pwth pcnv fconv sconv delsp gdbw cornf fname delfracind ... 12 | tpath mag delfrac hr upsampleon defaultch 13 | 14 | % paraminitialized 15 | 16 | f0floor=40; % Lower limit of F0 search range 17 | f0ceil=800; % Upper limit of F0 search range 18 | fs=22050; % sampling frequency (Hz) 19 | framem=40; % default frame length limit for pitch extraction (ms) 20 | shiftm=1; % default frame shift (ms) for spectrogram 21 | f0shiftm=1; % default frame shift (ms) for F0 information 22 | fftl=1024; % default FFT length 23 | eta=1.4; % time window stretch factor 24 | pc=0.6; % exponent for nonlinearity 25 | mag=0.2; % This parameter should be revised. 26 | framel=framem*fs/1000; 27 | 28 | if fftl < framel 29 | fftl=2^ceil(log(framel)/log(2)); 30 | end; 31 | fftl2=fftl/2; 32 | defaultch=1; % 17/Feb./2001 33 | 34 | %-------------- Decision parameter for source information 35 | 36 | acth=0.5; % Threshold for normalized correlation (dimension less) 37 | pwth=32; % Threshold for instantaneous power below maximum (dB) 38 | 39 | %----------------------------------------------------- 40 | % Synthesis parameters 41 | %----------------------------------------------------- 42 | 43 | pcnv=1.0; % pitch stretch 44 | fconv=1.0; % frequency stretch 45 | sconv=1.0; % time stretch 46 | 47 | % delsp=2; % standard deviation of random group delay in ms 48 | delsp=0.5; % standard deviation of random group delay in ms 26/June/2002 49 | gdbw=70; % smoothing window length of random group delay (in Hz) 50 | % cornf=3000; % corner frequency for random phase (Hz) 51 | cornf=4000; % corner frequency for random phase (Hz) 26/June 2002 52 | delfrac=0.2; % This parameter should be revised. 53 | delfracind=0; 54 | 55 | %----------------------------------------------------- 56 | % file parameters 57 | %----------------------------------------------------- 58 | 59 | fname='none'; % input data file name 60 | 61 | hr='on'; 62 | tpath=pwd; 63 | if strcmp(computer,'MAC2')==0 64 | tpath=[tpath '/']; 65 | end; 66 | upsampleon=0; 67 | ok=1; 68 | return; 69 | -------------------------------------------------------------------------------- /src/exSinStraightSynth.m: -------------------------------------------------------------------------------- 1 | function [sy,prmS] = exSinStraightSynth(f0raw,n3sgram,fs,optionalParamsS) 2 | % STRAIGHT synthesis based on sinusoidal plus noise model 3 | % [sy,prmS] = exSinStraightSynth(f0raw,n3sgram,ap,fs,optionalParams) 4 | % Input 5 | % f0raw : fundamental frequency (Hz) 6 | % n3sgram : STRAIGHT spectrogram 7 | % fs : sampling frequency 8 | % optionalParamsS : optional parameters 9 | % spectralUpdateInterval : frame rate (ms) 10 | % initialPhase : initial phase of sinusoids (radian) 11 | % initialAmplitude : initial amplitude for defining waveform 12 | % lowestF0 : lowest F0 of the synthesized speech (Hz) 13 | % minimumPhase : minimum phase indicator (defult 0) 14 | % Output 15 | % sy : synthesized speech waveform 16 | % prmS : parameters used in synthesis 17 | 18 | % Originally coded when visiting CNBH on 2003 19 | % Revised by Hideki Kawahara 20 | % 11/December/2005 by Hideki Kawahara 21 | 22 | sy = []; 23 | switch nargin 24 | case 3 25 | prmS = zinitializeParameters(fs); 26 | case 4 27 | prmS = replaceSuppliedParameters(fs,optionalParamsS); 28 | otherwise 29 | help exSinStraightSynth 30 | fs = 44100; 31 | prmS = zinitializeParameters(fs); 32 | return; 33 | end; 34 | shiftm = prmS.spectralUpdateInterval; 35 | initialPhase = prmS.initialPhase; 36 | initialAmplitude = prmS.initialAmplitude; 37 | minimumPhase = prmS.minimumPhase; 38 | 39 | cdm = unwrap(zspectrum2minimumphase(n3sgram,fs)); 40 | [amx,fmx,cmx]= sinucompgd(f0raw,fs,n3sgram,cdm,shiftm); 41 | amx(isnan(amx))=0; 42 | cmx(isnan(cmx))=0; 43 | deltaPhase = 2*pi*fmx/fs; 44 | phaseDeviation = cmx*minimumPhase; 45 | [~,nFrequency] = size(deltaPhase); 46 | lPhaseVector = length(initialPhase); 47 | deltaPhase(1,:) = initialPhase(min(lPhaseVector,1:nFrequency))+deltaPhase(1,:); 48 | amx = amx*diag(initialAmplitude(min(lPhaseVector,1:nFrequency))); 49 | sy=sum(real(amx.*exp(1i*(cumsum(deltaPhase)+phaseDeviation))), 2); 50 | return; 51 | 52 | %%%---- internal functions 53 | function prmS = zinitializeParameters(fs) 54 | prmS.spectralUpdateInterval = 1; %shiftm=1; % default frame shift (ms) for spectrogram 55 | prmS.lowestF0 = 50; % compatible default is 50 Hz 56 | prmS.initialPhase = zeros(1,ceil(fs/prmS.lowestF0/2)); 57 | prmS.initialAmplitude = ones(1,ceil(fs/prmS.lowestF0/2)); 58 | prmS.minimumPhase = 0; % default is zero phase 59 | prmS.samplingFrequency = fs; 60 | return; 61 | 62 | %%%---- 63 | function prmS = replaceSuppliedParameters(fs,prmin) 64 | prmS = zinitializeParameters(fs); 65 | if isfield(prmin,'spectralUpdateInterval')==1; 66 | prmS.spectralUpdateInterval=prmin.spectralUpdateInterval;end; 67 | if isfield(prmin,'lowestF0')==1; 68 | prmS.lowestF0=prmin.lowestF0;end; 69 | if isfield(prmin,'initialPhase')==1; 70 | prmS.initialPhase=prmin.initialPhase;end; 71 | if isfield(prmin,'initialAmplitude')==1; 72 | prmS.initialAmplitude=prmin.initialAmplitude;end; 73 | if isfield(prmin,'minimumPhase')==1; 74 | prmS.minimumPhase=prmin.minimumPhase;end; 75 | return; 76 | 77 | %%%---- 78 | function [amx,fmx,cmx]= sinucompgd(f0raw,fs,n3sgram,cdm,shiftm) 79 | 80 | % [amx,fmx]=sinucomp(f0raw,fs,n3sgram,shiftm) 81 | % program to generate matrix for sinusoidal synthesis 82 | % 83 | 84 | % Designed and Coded by Hideki Kawahara 85 | % 07/Sept./2003 86 | 87 | t=0:1/fs:(length(f0raw)-1)/1000/shiftm; 88 | f0i=interp1((0:length(f0raw)-1)/1000/shiftm,f0raw,t)'; 89 | f0l=min(f0raw(f0raw>0)); 90 | ng=n3sgram'; 91 | ng(:,1) = ng(:,1)*0; 92 | [~,mm]=size(ng); 93 | 94 | % ---- instantaneous frequency matrix --- 95 | nh=ceil(fs/2/f0l); 96 | nt=length(f0i); 97 | fmx=zeros(nt,nh); 98 | tmx=fmx; 99 | for ii=0:nh-1 100 | fmx(:,ii+1)=ii*f0i; 101 | tmx(:,ii+1)=t'; 102 | end; 103 | 104 | % ---- instantaneous amplitude matrix --- 105 | [ff,tt]=meshgrid((0:(mm-1))*fs/((mm-1)*2),(0:(length(f0raw)-1))/1000/shiftm); 106 | amx=interp2(ff,tt,ng,fmx,tmx,'*linear'); 107 | cmx=interp2(ff,ff,cdm',fmx,fmx,'*linear'); 108 | return; 109 | 110 | %%%--- 111 | function cph=zspectrum2minimumphase(n3sgram,~) 112 | % cph=spectrum2minimumphase(n3sgram,fs) 113 | % function to calculate minimum phase map from 114 | % smoothed time frequency representation 115 | 116 | % Designed and coded by Hideki Kawahara 117 | % 7/Sept./2003 118 | % 11/Dec./2005 revised 119 | 120 | [nRow,nColumn]=size(n3sgram); 121 | fftl=(nRow-1)*2; 122 | 123 | reversedIndex=fftl/2:-1:2; 124 | cph=zeros(nRow,nColumn); 125 | for ii=1:nColumn 126 | dftSegment=[n3sgram(:,ii);n3sgram(reversedIndex,ii)]; 127 | complexCepstrum=real(fft(log(dftSegment))); 128 | causalCepstrum=[complexCepstrum(1);2*complexCepstrum(2:fftl/2);0*complexCepstrum(fftl/2+1:fftl)]; 129 | causalLogSpectrum=ifft(causalCepstrum); 130 | cph(:,ii)=-imag(causalLogSpectrum(1:fftl/2+1)); 131 | end; 132 | -------------------------------------------------------------------------------- /src/exSinStraightSynthBU.m: -------------------------------------------------------------------------------- 1 | function sy = exSinStraightSynth(f0raw,fs,n3sgram,shiftm) 2 | 3 | gdm=gdmap(n3sgram,fs); 4 | [amx,fmx,gmx]= sinucompgd(f0raw,fs,n3sgram,gdm,shiftm); 5 | amx(isnan(amx))=0; 6 | sy=sum(amx'.*cos(cumsum(2*pi*fmx/fs))'); 7 | 8 | function [amx,fmx,gmx]= sinucompgd(f0raw,fs,n3sgram,gdm,shiftm) 9 | 10 | % [amx,fmx]=sinucomp(f0raw,fs,n3sgram,shiftm) 11 | % program to generate matrix for sinusoidal synthesis 12 | % 13 | 14 | % Designed and Coded by Hideki Kawahara 15 | % 07/Sept./2003 16 | 17 | t=0:1/fs:(length(f0raw)-1)/1000/shiftm; 18 | f0i=interp1((0:length(f0raw)-1)/1000/shiftm,f0raw,t)'; 19 | f0l=min(f0raw(f0raw>0)); 20 | ng=n3sgram'; 21 | ng(:,1) = ng(:,1)*0; 22 | gd=gdm'; 23 | [nn,mm]=size(ng); 24 | 25 | % ---- instantaneous frequency matrix --- 26 | nh=ceil(fs/2/f0l); 27 | nt=length(f0i); 28 | fmx=zeros(nt,nh); 29 | tmx=fmx; 30 | for ii=1:nh 31 | fmx(:,ii)=ii*f0i; 32 | tmx(:,ii)=t'; 33 | end; 34 | 35 | % ---- instantaneous amplitude matrix --- 36 | amx=zeros(nt,nh); 37 | [ff,tt]=meshgrid((0:(mm-1))*fs/((mm-1)*2),(0:(length(f0raw)-1))/1000/shiftm); 38 | %keyboard; 39 | amx=interp2(ff,tt,ng,fmx,tmx); 40 | gmx=interp2(ff,tt,gd,fmx,tmx); 41 | 42 | function gdm=gdmap(n3sgram,fs) 43 | % gdm=gdmap(n3sgram,fs) 44 | % function to calculate group delay map from 45 | % smoothed time frequency representation 46 | 47 | % Designed and coded by Hideki Kawahara 48 | % 7/Sept./2003 49 | 50 | [nn,mm]=size(n3sgram); 51 | fftl=(nn-1)*2; 52 | 53 | rbb2=fftl/2:-1:2; 54 | gdm=zeros(nn,mm); 55 | for ii=1:mm 56 | ff=[n3sgram(:,ii);n3sgram(rbb2,ii)]; 57 | ccp=real(fft(log(ff))); 58 | ccp2=[ccp(1);2*ccp(2:fftl/2);0*ccp(fftl/2+1:fftl)]; 59 | ffx=(-ifft(ccp2)); 60 | gdt=-diff(imag(ffx)/(2*pi*fs/fftl)); 61 | gdm(:,ii)=[gdt(1);gdt(1:fftl/2)]; 62 | end; 63 | -------------------------------------------------------------------------------- /src/exSinStraightSynthBU2.m: -------------------------------------------------------------------------------- 1 | function [sy,prmS] = exSinStraightSynth(f0raw,n3sgram,fs,optionalParamsS) 2 | % STRAIGHT synthesis based on sinusoidal plus noise model 3 | % [sy,prmS] = exSinStraightSynth(f0raw,n3sgram,ap,fs,optionalParams) 4 | % Input 5 | % f0raw : fundamental frequency (Hz) 6 | % n3sgram : STRAIGHT spectrogram 7 | % fs : sampling frequency 8 | % optionalParamsS : optional parameters 9 | % spectralUpdateInterval : frame rate (ms) 10 | % initialPhase : initial phase of sinusoids (radian) 11 | % lowestF0 : lowest F0 of the synthesized speech (Hz) 12 | % Output 13 | % sy : synthesized speech waveform 14 | % prmS : parameters used in synthesis 15 | 16 | % Originally coded when visiting CNBH on 2003 17 | % Revised by Hideki Kawahara 18 | % 10/December/2005 by Hideki Kawahara 19 | 20 | switch nargin 21 | case 3 22 | prmS = zinitializeParameters(fs); 23 | case 4 24 | prmS = replaceSuppliedParameters(fs,optionalParamsS); 25 | end; 26 | shiftm = prmS.spectralUpdateInterval; 27 | initialPhase = prmS.initialPhase; 28 | initialAmplitude = prmS.initialAmplitude; 29 | lowestF0 = prmS.lowestF0; % compatible default is 50 Hz 30 | minimumPhase = prmS.minimumPhase; 31 | 32 | %[groupDelayMap,cdm]=spectrum2GroupDelay(n3sgram,fs); 33 | cdm =spectrum2minimumphase(n3sgram,fs); 34 | [amx,fmx,cmx]= sinucompgd(f0raw,fs,n3sgram,cdm,shiftm); 35 | amx(isnan(amx))=0; 36 | %gmx(isnan(gmx))=0; 37 | cmx(isnan(cmx))=0; 38 | deltaPhase = 2*pi*fmx/fs; 39 | %phaseDeviation = -2*pi*gmx.*fmx*minimumPhase; 40 | phaseDeviation = cmx*minimumPhase; 41 | [nTime,nFrequency] = size(deltaPhase); 42 | lPhaseVector = length(initialPhase); 43 | deltaPhase(1,:) = initialPhase(min(lPhaseVector,1:nFrequency))+deltaPhase(1,:); 44 | amx = amx*diag(initialAmplitude(min(lPhaseVector,1:nFrequency))); 45 | sy=sum(real(amx.*exp(i*(cumsum(deltaPhase)+phaseDeviation)))'); 46 | 47 | %%%---- internal functions 48 | function prmS = zinitializeParameters(fs); 49 | prmS.spectralUpdateInterval = 1; %shiftm=1; % default frame shift (ms) for spectrogram 50 | prmS.lowestF0 = 50; % compatible default is 50 Hz 51 | prmS.initialPhase = zeros(1,ceil(fs/prmS.lowestF0/2)); 52 | prmS.initialAmplitude = ones(1,ceil(fs/prmS.lowestF0/2)); 53 | prmS.minimumPhase = 0; % default is zero phase 54 | return; 55 | 56 | %%%---- 57 | function prmS = replaceSuppliedParameters(fs,prmin); 58 | prmS = zinitializeParameters(fs); 59 | if isfield(prmin,'spectralUpdateInterval')==1; 60 | prmS.spectralUpdateInterval=prmin.spectralUpdateInterval;end; 61 | if isfield(prmin,'lowestF0')==1; 62 | prmS.lowestF0=prmin.lowestF0;end; 63 | if isfield(prmin,'initialPhase')==1; 64 | prmS.initialPhase=prmin.initialPhase;end; 65 | if isfield(prmin,'initialAmplitude')==1; 66 | prmS.initialAmplitude=prmin.initialAmplitude;end; 67 | if isfield(prmin,'minimumPhase')==1; 68 | prmS.minimumPhase=prmin.minimumPhase;end; 69 | return; 70 | 71 | %%%---- 72 | function [amx,fmx,cmx]= sinucompgd(f0raw,fs,n3sgram,cdm,shiftm) 73 | 74 | % [amx,fmx]=sinucomp(f0raw,fs,n3sgram,shiftm) 75 | % program to generate matrix for sinusoidal synthesis 76 | % 77 | 78 | % Designed and Coded by Hideki Kawahara 79 | % 07/Sept./2003 80 | 81 | t=0:1/fs:(length(f0raw)-1)/1000/shiftm; 82 | f0i=interp1((0:length(f0raw)-1)/1000/shiftm,f0raw,t)'; 83 | f0l=min(f0raw(f0raw>0)); 84 | ng=n3sgram'; 85 | ng(:,1) = ng(:,1)*0; 86 | %gd=gdm'; 87 | [nn,mm]=size(ng); 88 | 89 | % ---- instantaneous frequency matrix --- 90 | nh=ceil(fs/2/f0l); 91 | nt=length(f0i); 92 | fmx=zeros(nt,nh); 93 | tmx=fmx; 94 | for ii=0:nh-1 95 | fmx(:,ii+1)=ii*f0i; 96 | tmx(:,ii+1)=t'; 97 | end; 98 | 99 | % ---- instantaneous amplitude matrix --- 100 | amx=zeros(nt,nh); 101 | [ff,tt]=meshgrid((0:(mm-1))*fs/((mm-1)*2),(0:(length(f0raw)-1))/1000/shiftm); 102 | %keyboard; 103 | amx=interp2(ff,tt,ng,fmx,tmx,'*linear'); 104 | %gmx=interp2(ff,tt,gd,fmx,tmx,'*linear'); 105 | cmx=interp2(ff,ff,cdm',fmx,fmx,'*linear'); 106 | return; 107 | 108 | %%%--- 109 | function cph=spectrum2minimumphase(n3sgram,fs) 110 | % gdm=spectrum2GroupDelay(n3sgram,fs) 111 | % function to calculate group delay map from 112 | % smoothed time frequency representation 113 | 114 | % Designed and coded by Hideki Kawahara 115 | % 7/Sept./2003 116 | 117 | [nRow,nColumn]=size(n3sgram); 118 | fftl=(nRow-1)*2; 119 | 120 | reversedIndex=fftl/2:-1:2; 121 | %gdm=zeros(nRow,nColumn); 122 | cph=zeros(nRow,nColumn); 123 | for ii=1:nColumn 124 | dftSegment=[n3sgram(:,ii);n3sgram(reversedIndex,ii)]; 125 | complexCepstrum=real(fft(log(dftSegment))); 126 | causalCepstrum=[complexCepstrum(1);2*complexCepstrum(2:fftl/2);0*complexCepstrum(fftl/2+1:fftl)]; 127 | causalLogSpectrum=ifft(causalCepstrum); 128 | % rawGroupDelay=-diff(-imag(causalLogSpectrum)/(2*pi*fs/fftl)); 129 | % gdm(:,ii)=[rawGroupDelay(1);rawGroupDelay(1:fftl/2)]; 130 | cph(:,ii)=-imag(causalLogSpectrum(1:fftl/2+1)); 131 | end; 132 | -------------------------------------------------------------------------------- /src/exstraightAPind.m: -------------------------------------------------------------------------------- 1 | function [ap,analysisParams]=exstraightAPind(x,fs,f0,optionalParams) 2 | % Aperiodicity index extraction for STRAIGHT 3 | % [ap,analysisParams]=exstraightAPind(x,fs,f0,optionalParams) 4 | % Input parameters 5 | % x : input signal. if it is multi channel, only the first channel is used 6 | % fs : sampling frequency (Hz) 7 | % f0 : fundamental frequency (Hz) 8 | % optionalParams : Optional parameters for analysis 9 | % Output parameters 10 | % ap : amount of aperiodic component in the time frequency represntation 11 | % : represented in dB 12 | % analysisParams : Analysis parameters actually used 13 | % 14 | % Usage: 15 | % Case 1: The simplest method 16 | % ap=exstraightAPind(x,fs,f0); 17 | % Case 2: You can get to know what parameters were used. 18 | % [ap,analysisParams]=exstraightAPind(x,fs,f0); 19 | % CAse 3: You can have full control of STRAIGHT synthesis. 20 | % Please use case 2 to find desired parameters to modify. 21 | % [ap,analysisParams]=exstraightAPind(x,fs,f0,optionalParams); 22 | 23 | % Notes on programing style 24 | % This routine is based on the current (2005.1.31) implementation of 25 | % STRAIGHT that consist of many legacy fragments. They were intentionally 26 | % kept for maintaining historic record. Revised functions written in a 27 | % reasonable stylistic practice will be made available soon. 28 | 29 | % Designed and coded by Hideki Kawahara 30 | % 15/January/2005 31 | % 01/February/2005 extended for user control 32 | % 13/March/2005 Aperiodicity index extraction part is isolated 33 | % 30/April/2005 modification for Matlab v7.0 compatibility 34 | % 11/Sept./2005 waitbar control is fixed. 35 | % 05/July/2006 default values are modified, framem 36 | 37 | %---Check for number of input parameters 38 | switch nargin 39 | case 3 40 | prm=zinitializeParameters; 41 | case 4 42 | prm=replaceSuppliedParameters(optionalParams); 43 | otherwise 44 | disp('Number of arguments is 3 or 4!'); 45 | return; 46 | end 47 | 48 | % Initialize default parameters 49 | f0ceil = prm.F0searchUpperBound; % f0ceil 50 | framem = prm.F0defaultWindowLength; % default frame length for pitch extraction (ms) 51 | f0shiftm = prm.F0frameUpdateInterval; % shiftm % F0 calculation interval (ms) 52 | 53 | fftl=1024; % default FFT length 54 | 55 | framel=framem*fs/1000; 56 | 57 | if fftl < framel 58 | fftl=2^ceil(log(framel)/log(2)); 59 | end; 60 | 61 | [nr,nc]=size(x); 62 | if nr>nc 63 | x=x(:,1); 64 | else 65 | x=x(1,:)'; 66 | end; 67 | 68 | imageOn = prm.DisplayPlots; % imgi=1; % image display indicator (1: display image) 69 | 70 | % paramaters for F0 refinement 71 | fftlf0r = prm.refineFftLength; %fftlf0r=1024; % FFT length for F0 refinement 72 | tstretch = prm.refineTimeStretchingFactor; %tstretch=1.1; % time window stretching factor 73 | nhmx = prm.refineNumberofHarmonicComponent; %nhmx=3; % number of harmonic components for F0 refinement 74 | iPeriodicityInterval = prm.periodicityFrameUpdateInterval; % frame update interval for periodicity index (ms) 75 | 76 | %---- F0 refinement 77 | nstp=1; % start position of F0 refinement (samples) 78 | nedp=length(f0); % last position of F0 refinement (samples) 79 | dn=floor(fs/(f0ceil*3*2)); % fix by H.K. at 28/Jan./2003 80 | [f0raw,ecr]=refineF06(decimate(x,dn),fs/dn,f0,fftlf0r,tstretch,nhmx,f0shiftm,nstp,nedp,imageOn); % 31/Aug./2004 81 | 82 | ecrt=ecr; 83 | ecrt(f0raw==0)=ecrt(f0raw==0)*NaN; 84 | 85 | %----- aperiodicity estimation 86 | f0raw=f0; 87 | [apvq,dpvq,~,~]=aperiodicpartERB2(x,fs,f0raw,f0shiftm,iPeriodicityInterval,fftl/2+1,imageOn); % 10/April/2002 88 | apv=10*log10(apvq); % for compatibility 89 | dpv=10*log10(dpvq); % for compatibility 90 | 91 | %- --------- 92 | % Notes on aperiodicity estimation: The previous implementation of 93 | % aperiodicity estimation was sensitive to low frequency noise. It is a 94 | % bad news, because environmental noise usually has its power in the low 95 | % frequency region. The following corrction uses the C/N information 96 | % which is the byproduct of fixed point based F0 estimation. 97 | % by H.K. 04/Feb./2003 98 | %- --------- 99 | dpv=correctdpv(apv,dpv,iPeriodicityInterval,f0raw,ecrt,f0shiftm,fs); % Aperiodicity correction 04/Feb./2003 by H.K. 100 | 101 | if imageOn 102 | bv=boundmes2(apv,dpv,fs,f0shiftm,iPeriodicityInterval,fftl/2+1); 103 | figure; 104 | semilogy((0:length(bv)-1)*f0shiftm,0.5./10.0.^(bv));grid on; 105 | set(gcf,'PaperPosition', [0.634517 0.634517 19.715 28.4084]); 106 | end; 107 | 108 | ap=aperiodiccomp(apv,dpv,iPeriodicityInterval,f0raw,f0shiftm,imageOn); % 11/Sept./2005 109 | 110 | switch nargout 111 | case 1 112 | case 2 113 | analysisParams=prm; 114 | otherwise 115 | disp('Number of output parameters has to be 1 or 2!') 116 | end; 117 | end 118 | 119 | %%%---- internal functions 120 | 121 | %%%------ 122 | function prm=zinitializeParameters 123 | prm.F0searchLowerBound=40; % f0floor 124 | prm.F0searchUpperBound=800; % f0ceil 125 | prm.F0defaultWindowLength = 80; % default frame length for pitch extraction (ms) 126 | prm.F0frameUpdateInterval=1; % shiftm % F0 calculation interval (ms) 127 | prm.NofChannelsInOctave=24; % nvo=24; % Number of channels in one octave 128 | prm.IFWindowStretch=1.2; % mu=1.2; % window stretch from isometric window 129 | prm.DisplayPlots=0; % imgi=1; % image display indicator (1: display image) 130 | prm.IFsmoothingLengthRelToFc=1; % smp=1; % smoothing length relative to fc (ratio) 131 | prm.IFminimumSmoothingLength=5; % minm=5; % minimum smoothing length (ms) 132 | prm.IFexponentForNonlinearSum=0.5; % pc=0.5; % exponent to represent nonlinear summation 133 | prm.IFnumberOfHarmonicForInitialEstimate=1; % nc=1; % number of harmonic component to use (1,2,3) 134 | prm.refineFftLength=1024; %fftlf0r=1024; % FFT length for F0 refinement 135 | prm.refineTimeStretchingFactor=1.1; %tstretch=1.1; % time window stretching factor 136 | prm.refineNumberofHarmonicComponent=3; %nhmx=3; % number of harmonic components for F0 refinement 137 | prm.periodicityFrameUpdateInterval=5; % frame update interval for periodicity index (ms)return 138 | prm.note=' '; % Any text to be printed on the source information plot 139 | end 140 | 141 | %%%-------- 142 | function prm=replaceSuppliedParameters(prmin) 143 | prm=zinitializeParameters; 144 | if isfield(prmin,'F0searchLowerBound')==1; 145 | prm.F0searchLowerBound=prmin.F0searchLowerBound;end; 146 | if isfield(prmin,'F0searchUpperBound')==1; 147 | prm.F0searchUpperBound=prmin.F0searchUpperBound;end; 148 | if isfield(prmin,'F0defaultWindowLength')==1; 149 | prm.F0defaultWindowLength=prmin.F0defaultWindowLength;end; 150 | if isfield(prmin,'F0frameUpdateInterval')==1; 151 | prm.F0frameUpdateInterval=prmin.F0frameUpdateInterval;end; 152 | if isfield(prmin,'NofChannelsInOctave')==1; 153 | prm.NofChannelsInOctave=prmin.NofChannelsInOctave;end; 154 | if isfield(prmin,'IFWindowStretch')==1; 155 | prm.IFWindowStretch=prmin.IFWindowStretch;end; 156 | if isfield(prmin,'DisplayPlots')==1; 157 | prm.DisplayPlots=prmin.DisplayPlots;end; 158 | if isfield(prmin,'IFsmoothingLengthRelToFc')==1; 159 | prm.IFsmoothingLengthRelToFc=prmin.IFsmoothingLengthRelToFc;end; 160 | if isfield(prmin,'IFminimumSmoothingLength')==1; 161 | prm.IFminimumSmoothingLength=prmin.IFminimumSmoothingLength;end; 162 | if isfield(prmin,'IFexponentForNonlinearSum')==1; 163 | prm.IFexponentForNonlinearSum=prmin.IFexponentForNonlinearSum;end; 164 | if isfield(prmin,'IFnumberOfHarmonicForInitialEstimate')==1; 165 | prm.IFnumberOfHarmonicForInitialEstimate=prmin.IFnumberOfHarmonicForInitialEstimate;end; 166 | if isfield(prmin,'refineFftLength')==1; 167 | prm.refineFftLength=prmin.refineFftLength;end; 168 | if isfield(prmin,'refineTimeStretchingFactor')==1; 169 | prm.refineTimeStretchingFactor=prmin.refineTimeStretchingFactor;end; 170 | if isfield(prmin,'refineNumberofHarmonicComponent')==1; 171 | prm.refineNumberofHarmonicComponent=prmin.refineNumberofHarmonicComponent;end; 172 | if isfield(prmin,'periodicityFrameUpdateInterval')==1; 173 | prm.periodicityFrameUpdateInterval=prmin.periodicityFrameUpdateInterval;end; 174 | if isfield(prmin,'note')==1; 175 | prm.note=prmin.note;end; 176 | end 177 | -------------------------------------------------------------------------------- /src/exstraightsource.m: -------------------------------------------------------------------------------- 1 | function [f0raw,ap,analysisParams]=exstraightsource(x,fs,optionalParams) 2 | % Source information extraction for STRAIGHT 3 | % [f0raw,ap,analysisParams]=exstraightsource(x,fs,optionalParams) 4 | % Input parameters 5 | % x : input signal. if it is multi channel, only the first channel is used 6 | % fs : sampling frequency (Hz) 7 | % optionalParams : Optional parameters for analysis 8 | % Output parameters 9 | % f0raw : fundamental frequency (Hz) 10 | % ap : amount of aperiodic component in the time frequency represntation 11 | % : represented in dB 12 | % analysisParams : Analysis parameters actually used 13 | % 14 | % Usage: 15 | % Case 1: The simplest method 16 | % [f0raw,ap]=exstraightsource(x,fs); 17 | % Case 2: You can get to know what parameters were used. 18 | % [f0raw,ap,analysisParams]=exstraightsource(x,fs); 19 | % CAse 3: You can have full control of STRAIGHT synthesis. 20 | % Please use case 2 to find desired parameters to modify. 21 | % [f0raw,ap,analysisParams]=exstraightsource(x,fs,optionalParams); 22 | 23 | % Notes on programing style 24 | % This routine is based on the current (2005.1.31) implementation of 25 | % STRAIGHT that consist of many legacy fragments. They were intentionally 26 | % kept for maintaining historic record. Revised functions written in a 27 | % reasonable stylistic practice will be made available soon. 28 | 29 | % Designed and coded by Hideki Kawahara 30 | % 15/January/2005 31 | % 01/February/2005 extended for user control 32 | % 30/April/2005 modification for Matlab v7.0 compatibility 33 | 34 | %---Check for number of input parameters 35 | switch nargin 36 | case 2 37 | prm=zinitializeParameters; 38 | case 3 39 | prm=replaceSuppliedParameters(optionalParams); 40 | otherwise 41 | disp('Number of arguments is 2 or 3!'); 42 | return; 43 | end 44 | 45 | % Initialize default parameters 46 | f0floor = prm.F0searchLowerBound; % f0floor 47 | f0ceil = prm.F0searchUpperBound; % f0ceil 48 | framem = prm.F0defaultWindowLength; % default frame length for pitch extraction (ms) 49 | f0shiftm = prm.F0frameUpdateInterval; % shiftm % F0 calculation interval (ms) 50 | 51 | fftl=1024; % default FFT length 52 | 53 | framel=framem*fs/1000; 54 | 55 | if fftl < framel 56 | fftl=2^ceil(log(framel)/log(2)); 57 | end; 58 | 59 | [nr,nc]=size(x); 60 | if nr>nc 61 | x=x(:,1); 62 | else 63 | x=x(1,:)'; 64 | end; 65 | 66 | nvo = prm.NofChannelsInOctave; % nvo=24; % Number of channels in one octave 67 | mu = prm.IFWindowStretch; % mu=1.2; % window stretch from isometric window 68 | imageOn = prm.DisplayPlots; % imgi=1; % image display indicator (1: display image) 69 | smp = prm.IFsmoothingLengthRelToFc; % smp=1; % smoothing length relative to fc (ratio) 70 | minsm = prm.IFminimumSmoothingLength; % minm=5; % minimum smoothing length (ms) 71 | pcf0 = prm.IFexponentForNonlinearSum; % pc=0.5; % exponent to represent nonlinear summation 72 | nh = prm.IFnumberOfHarmonicForInitialEstimate; % nc=1; % number of harmonic component to use (1,2,3) 73 | fname= prm.note; %=' '; % Any text to be printed on the source information plot 74 | 75 | nvc=ceil(log(f0ceil/f0floor)/log(2)*nvo); % number of channels 76 | 77 | % paramaters for F0 refinement 78 | fftlf0r = prm.refineFftLength; %fftlf0r=1024; % FFT length for F0 refinement 79 | tstretch = prm.refineTimeStretchingFactor; %tstretch=1.1; % time window stretching factor 80 | nhmx = prm.refineNumberofHarmonicComponent; %nhmx=3; % number of harmonic components for F0 refinement 81 | iPeriodicityInterval = prm.periodicityFrameUpdateInterval; % frame update interval for periodicity index (ms) 82 | 83 | %---- F0 extraction based on a fixed-point method in the frequency domain 84 | 85 | [f0v,vrv,dfv,~,aav]=fixpF0VexMltpBG4(x,fs,f0floor,nvc,nvo,mu,imageOn,f0shiftm,smp,minsm,pcf0,nh); 86 | if imageOn 87 | title([fname ' ' datestr(now,0)]); 88 | drawnow; 89 | end; 90 | 91 | %---- post processing for V/UV decision and F0 tracking 92 | [pwt,pwh]=zplotcpower(x,fs,f0shiftm,imageOn); 93 | 94 | [f0raw,irms,~,~]=f0track5(f0v,vrv,dfv,pwt,pwh,aav,f0shiftm,imageOn); % 11/Sept./2005 95 | f0t=f0raw;avf0=mean(f0raw(f0raw>0)); 96 | f0t(f0t==0)=f0t(f0t==0)*NaN;tt=1:length(f0t); 97 | 98 | if imageOn 99 | subplot(615);plot(tt*f0shiftm,f0t,'g');grid on; 100 | if ~isnan(avf0) 101 | axis([1 max(tt)*f0shiftm ... 102 | min(avf0/sqrt(2),0.95*min(f0raw(f0raw>0))) ... 103 | max(avf0*sqrt(2),1.05*max(f0raw(f0raw>0)))]); 104 | end; 105 | ylabel('F0 (Hz)'); 106 | hold on; 107 | end; 108 | 109 | %---- F0 refinement 110 | nstp=1; % start position of F0 refinement (samples) 111 | nedp=length(f0raw); % last position of F0 refinement (samples) 112 | dn=floor(fs/(f0ceil*3*2)); % fix by H.K. at 28/Jan./2003 113 | [f0raw,ecr]=refineF06(decimate(x,dn),fs/dn,f0raw,fftlf0r,tstretch,nhmx,f0shiftm,nstp,nedp,imageOn); % 31/Aug./2004% 11/Sept.2005 114 | 115 | if imageOn 116 | f0t=f0raw; 117 | f0t(f0t==0)=f0t(f0t==0)*NaN;tt=1:length(f0t); 118 | subplot(615);plot(tt*f0shiftm,f0t,'k');hold off; 119 | drawnow 120 | end; 121 | %----------- 31/July/1999 122 | ecrt=ecr; 123 | ecrt(f0raw==0)=ecrt(f0raw==0)*NaN; 124 | 125 | if imageOn 126 | tirms=irms; 127 | tirms(f0raw==0)=tirms(f0raw==0)*NaN; 128 | tirms(f0raw>0)=-20*log10(tirms(f0raw>0)); 129 | subplot(616);hrms=plot(tt*f0shiftm,tirms,'g',tt*f0shiftm,20*log10(ecrt),'r'); %31/July/1999 130 | set(hrms,'LineWidth',2);hold on 131 | plot(tt*f0shiftm,-10*log10(vrv),'k.'); 132 | grid on;hold off 133 | axis([1 max(tt)*f0shiftm -10 60]); 134 | xlabel('time (ms)');ylabel('C/N (dB)'); 135 | drawnow; 136 | end; 137 | 138 | %------------------------------------------------------------------------------------- 139 | f0raw(f0raw<=0)=f0raw(f0raw<=0)*0; % safeguard 31/August/2004 140 | f0raw(f0raw>f0ceil)=f0raw(f0raw>f0ceil)*0+f0ceil; % safeguard 31/August/2004 141 | 142 | if nargout == 1; return; end; 143 | 144 | %----- aperiodicity estimation 145 | [apvq,dpvq,~,~]=aperiodicpartERB2(x,fs,f0raw,f0shiftm,iPeriodicityInterval,fftl/2+1,imageOn); % 10/April/2002$11/Sept./2005 146 | apv=10*log10(apvq); % for compatibility 147 | dpv=10*log10(dpvq); % for compatibility 148 | %- --------- 149 | % Notes on aperiodicity estimation: The previous implementation of 150 | % aperiodicity estimation was sensitive to low frequency noise. It is a 151 | % bad news, because environmental noise usually has its power in the low 152 | % frequency region. The following corrction uses the C/N information 153 | % which is the byproduct of fixed point based F0 estimation. 154 | % by H.K. 04/Feb./2003 155 | %- --------- 156 | dpv=correctdpv(apv,dpv,iPeriodicityInterval,f0raw,ecrt,f0shiftm,fs); % Aperiodicity correction 04/Feb./2003 by H.K. 157 | 158 | if imageOn 159 | bv=boundmes2(apv,dpv,fs,f0shiftm,iPeriodicityInterval,fftl/2+1); 160 | figure; 161 | semilogy((0:length(bv)-1)*f0shiftm,0.5./10.0.^(bv));grid on; 162 | set(gcf,'PaperPosition', [0.634517 0.634517 19.715 28.4084]); 163 | end; 164 | 165 | ap=aperiodiccomp(apv,dpv,iPeriodicityInterval,f0raw,f0shiftm,imageOn); % 11/Sept./2005 166 | 167 | switch nargout 168 | case 2 169 | case 3 170 | analysisParams=prm; 171 | otherwise 172 | disp('Number of output parameters has to be 2 or 3!') 173 | end; 174 | 175 | end 176 | 177 | %%%---- internal functions 178 | function [pw,pwh]=zplotcpower(x,fs,shiftm,imageOn) 179 | 180 | flm=8; % 01/August/1999 181 | fl=round(flm*fs/1000); 182 | w=hanning(2*fl+1); 183 | w=w/sum(w); 184 | nn=length(x); 185 | 186 | flpm=40; 187 | flp=round(flpm*fs/1000); 188 | wlp=fir1(flp*2,70/(fs/2)); 189 | wlp(flp+1)=wlp(flp+1)-1; 190 | wlp=-wlp; 191 | 192 | tx=[x(:)' zeros(1,2*length(wlp))]; 193 | ttx=fftfilt(wlp,tx); 194 | ttx=ttx((1:nn)+flp); 195 | tx=[ttx(:)' zeros(1,2*length(w))]; 196 | 197 | pw=fftfilt(w,tx.^2); 198 | pw=pw((1:nn)+fl); 199 | mpw=max(pw); 200 | pw=pw(round(1:shiftm*fs/1000:nn)); 201 | pw(pw3kHz) '); 223 | end; 224 | end 225 | 226 | %%%------ 227 | function prm=zinitializeParameters 228 | prm.F0searchLowerBound=40; % f0floor 229 | prm.F0searchUpperBound=800; % f0ceil 230 | prm.F0defaultWindowLength = 80; % default frame length for pitch extraction (ms) 231 | prm.F0frameUpdateInterval=1; % shiftm % F0 calculation interval (ms) 232 | prm.NofChannelsInOctave=24; % nvo=24; % Number of channels in one octave 233 | prm.IFWindowStretch=1.2; % mu=1.2; % window stretch from isometric window 234 | prm.DisplayPlots=0; % imgi=1; % image display indicator (1: display image) 235 | prm.IFsmoothingLengthRelToFc=1; % smp=1; % smoothing length relative to fc (ratio) 236 | prm.IFminimumSmoothingLength=5; % minm=5; % minimum smoothing length (ms) 237 | prm.IFexponentForNonlinearSum=0.5; % pc=0.5; % exponent to represent nonlinear summation 238 | prm.IFnumberOfHarmonicForInitialEstimate=1; % nc=1; % number of harmonic component to use (1,2,3) 239 | prm.refineFftLength=1024; %fftlf0r=1024; % FFT length for F0 refinement 240 | prm.refineTimeStretchingFactor=1.1; %tstretch=1.1; % time window stretching factor 241 | prm.refineNumberofHarmonicComponent=3; %nhmx=3; % number of harmonic components for F0 refinement 242 | prm.periodicityFrameUpdateInterval=5; % frame update interval for periodicity index (ms)return 243 | prm.note=' '; % Any text to be printed on the source information plot 244 | end 245 | 246 | %%%-------- 247 | function prm=replaceSuppliedParameters(prmin) 248 | prm=zinitializeParameters; 249 | if isfield(prmin,'F0searchLowerBound')==1; 250 | prm.F0searchLowerBound=prmin.F0searchLowerBound;end; 251 | if isfield(prmin,'F0searchUpperBound')==1; 252 | prm.F0searchUpperBound=prmin.F0searchUpperBound;end; 253 | if isfield(prmin,'F0defaultWindowLength')==1; 254 | prm.F0defaultWindowLength=prmin.F0defaultWindowLength;end; 255 | if isfield(prmin,'F0frameUpdateInterval')==1; 256 | prm.F0frameUpdateInterval=prmin.F0frameUpdateInterval;end; 257 | if isfield(prmin,'NofChannelsInOctave')==1; 258 | prm.NofChannelsInOctave=prmin.NofChannelsInOctave;end; 259 | if isfield(prmin,'IFWindowStretch')==1; 260 | prm.IFWindowStretch=prmin.IFWindowStretch;end; 261 | if isfield(prmin,'DisplayPlots')==1; 262 | prm.DisplayPlots=prmin.DisplayPlots;end; 263 | if isfield(prmin,'IFsmoothingLengthRelToFc')==1; 264 | prm.IFsmoothingLengthRelToFc=prmin.IFsmoothingLengthRelToFc;end; 265 | if isfield(prmin,'IFminimumSmoothingLength')==1; 266 | prm.IFminimumSmoothingLength=prmin.IFminimumSmoothingLength;end; 267 | if isfield(prmin,'IFexponentForNonlinearSum')==1; 268 | prm.IFexponentForNonlinearSum=prmin.IFexponentForNonlinearSum;end; 269 | if isfield(prmin,'IFnumberOfHarmonicForInitialEstimate')==1; 270 | prm.IFnumberOfHarmonicForInitialEstimate=prmin.IFnumberOfHarmonicForInitialEstimate;end; 271 | if isfield(prmin,'refineFftLength')==1; 272 | prm.refineFftLength=prmin.refineFftLength;end; 273 | if isfield(prmin,'refineTimeStretchingFactor')==1; 274 | prm.refineTimeStretchingFactor=prmin.refineTimeStretchingFactor;end; 275 | if isfield(prmin,'refineNumberofHarmonicComponent')==1; 276 | prm.refineNumberofHarmonicComponent=prmin.refineNumberofHarmonicComponent;end; 277 | if isfield(prmin,'periodicityFrameUpdateInterval')==1; 278 | prm.periodicityFrameUpdateInterval=prmin.periodicityFrameUpdateInterval;end; 279 | if isfield(prmin,'note')==1; 280 | prm.note=prmin.note;end; 281 | end 282 | -------------------------------------------------------------------------------- /src/exstraightspec.m: -------------------------------------------------------------------------------- 1 | function [n3sgram,analysisParamsSp]=exstraightspec(x,f0raw,fs,optionalParamsSP) 2 | % Spectral information extraction for STRAIGHT 3 | % [n3sgram,nalysisParamsSp]=exstraightspec(x,f0raw,fs,optionalParamsSP) 4 | % Input parameters 5 | % x : input signal. only the first channel is analyzed 6 | % f0raw : fundamental frequency (Hz) in 1 ms temporal resolution 7 | % : set 0 for aperiodic part 8 | % fs : sampling freuency (Hz) 9 | % optionalParamsSP : spectrum analysis parameters 10 | % Output parameters 11 | % n3sgram : Smoothed time frequency representation (spectrogram) 12 | % analysisParamsSp : Actually used parameters 13 | % 14 | % Usage: 15 | % Case 1: The simplest method 16 | % n3sgram = exstraightspec(x,f0raw,fs); 17 | % Case 2: You can get to know what parameters were used. 18 | % [n3sgram,analysisParamsSp]=exstraightspec(x,f0raw,fs); 19 | % CAse 3: You can have full control of STRAIGHT synthesis. 20 | % Please use case 2 to find desired parameters to modify. 21 | % [n3sgram,analysisParamsSp]=exstraightspec(x,f0raw,fs,optionalParamsSP); 22 | 23 | 24 | % Designed and coded by Hideki Kawahara 25 | % 15/January/2005 26 | % 01/February/2005 27 | % 11/Sept./2005 waitbar control is fixed. 28 | % 05/July/2006 default values are modified, eta, framem 29 | 30 | %---Check for number of input parameters 31 | switch nargin 32 | case 3 33 | prm=zinitializeParameters; 34 | case 4 35 | prm=replaceSuppliedParameters(optionalParamsSP); 36 | otherwise 37 | disp('Number of arguments is 2 or 3!'); 38 | return; 39 | end 40 | 41 | % Initialize parameters 42 | imageOn = prm.DisplayPlots; %imageOn=0; % Display indicator. 0: No graphics, 1: Show graphics 43 | framem = prm.defaultFrameLength; %framem=40; % default frame length for pitch extraction (ms) 44 | shiftm = prm.spectralUpdateInterval; %shiftm=1; % default frame shift (ms) for spectrogram 45 | eta = prm.spectralTimeWindowStretch; %eta=1.4; % time window stretch factor 46 | pc = prm.spectralExponentForNonlinearity; %pc=0.6; % exponent for nonlinearity 47 | mag = prm.spectralTimeDomainCompensation; %mag=0.2; % This parameter should be revised. 48 | framel=framem*fs/1000; 49 | fftl=1024; % default FFT length 50 | 51 | if fftl < framel 52 | fftl=2^ceil(log(framel)/log(2)); 53 | end; 54 | [nr,nc]=size(x); 55 | if nr>nc 56 | xold=x(:,1); 57 | else 58 | xold=x(1,:)'; 59 | end; 60 | 61 | %---- Spectral estimation 62 | 63 | xamp=std(xold); 64 | scaleconst=2200; % magic number for compatibility 15/Jan./2005 65 | xold=xold/xamp*scaleconst; 66 | f0var=1; f0varL=1; % These are obsolate dummy variables. meaningless 67 | [n2sgrambk,~]=straightBodyC03ma(xold,fs,shiftm,fftl,f0raw,f0var,f0varL,eta,pc,imageOn); % 11/Sept./2005 68 | if mag>0 69 | n3sgram=specreshape(fs,n2sgrambk,eta,pc,mag,f0raw,imageOn); % 11/Sept./2005 70 | else 71 | n3sgram=n2sgrambk; 72 | end; 73 | n3sgram=n3sgram/scaleconst*xamp; 74 | analysisParamsSp = prm; 75 | return; 76 | 77 | %%%--- Internal functions 78 | function prm=zinitializeParameters 79 | prm.DisplayPlots = 0; %imageOn=0; % Display indicator. 0: No graphics, 1: Show graphics 80 | prm.defaultFrameLength = 80; %framem=40; % default frame length for pitch extraction (ms) 81 | prm.spectralUpdateInterval = 1; %shiftm=1; % default frame shift (ms) for spectrogram 82 | prm.spectralTimeWindowStretch = 1.0; %eta=1.4; % time window stretch factor 83 | prm.spectralExponentForNonlinearity = 0.6; %pc=0.6; % exponent for nonlinearity 84 | prm.spectralTimeDomainCompensation = 0.2; %mag=0.2; % This parameter should be revised. 85 | 86 | %%%---- 87 | function prm=replaceSuppliedParameters(prmin) 88 | prm=zinitializeParameters; 89 | if isfield(prmin,'DisplayPlots')==1; 90 | prm.DisplayPlots=prmin.DisplayPlots;end; 91 | if isfield(prmin,'defaultFrameLength')==1; 92 | prm.defaultFrameLength=prmin.defaultFrameLength;end; 93 | if isfield(prmin,'spectralUpdateInterval')==1; 94 | prm.spectralUpdateInterval=prmin.spectralUpdateInterval;end; 95 | if isfield(prmin,'spectralTimeWindowStretch')==1; 96 | prm.spectralTimeWindowStretch=prmin.spectralTimeWindowStretch;end; 97 | if isfield(prmin,'spectralExponentForNonlinearity')==1; 98 | prm.spectralExponentForNonlinearity=prmin.spectralExponentForNonlinearity;end; 99 | if isfield(prmin,'spectralTimeDomainCompensation')==1; 100 | prm.spectralTimeDomainCompensation=prmin.spectralTimeDomainCompensation;end; 101 | return; -------------------------------------------------------------------------------- /src/exstraightsynth.m: -------------------------------------------------------------------------------- 1 | function [sy,prmS] = exstraightsynth(f0raw,n3sgram,ap,fs,optionalParamsS) 2 | % Synthesis using STRAIGHT parameters with linear modifications 3 | % [sy,prmS] = exstraightsynth(f0raw,n3sgram,ap,fs,optionalParamsS) 4 | % Input parameters 5 | % f0raw : fundamental frequency (Hz) 6 | % n3sgram : STRAIGHT spectrogram (in absolute value) 7 | % ap : aperiodic component (dB re. to total power) 8 | % fs : sampling frequency (Hz) 9 | % optionalParamsS : optional synthesis parameters 10 | % Output parameters 11 | % sy : synthesized speech 12 | % prmS : Actually used synthesis parameters 13 | % 14 | % Usage: 15 | % Case 1: The simplest method 16 | % sy = exstraightsynth(f0raw,n3sgram,ap,fs); 17 | % Case 2: You can get to know what parameters were used. 18 | % [sy,prmS] = exstraightsynth(f0raw,n3sgram,ap,fs); 19 | % CAse 3: You can have full control of STRAIGHT synthesis. 20 | % Please use case 2 to find desired parameters to modify. 21 | % [sy,prmS] = exstraightsynth(f0raw,n3sgram,ap,fs,optionalParamsS); 22 | 23 | % Designed and coded by Hideki Kawahara 24 | % 15/January/2005 25 | % 01/February/2005 revised for generalization 26 | % 14/February/2005 fixed typo 27 | % 30/April/2005 modification for Matlab v7.0 compatibility 28 | % 11/Sept./2005 display indicator field is defined. 29 | % 27/Nov./2005 enabled setting lower limit of F0 30 | % 03/July/2015 refactord for MATLAB R2016a and Octave 31 | 32 | %---- Check input parameters 33 | switch nargin 34 | case 4 35 | prmS=zinitializeParameters; 36 | case 5 37 | prmS=replaceSuppliedParameters(optionalParamsS); 38 | otherwise 39 | disp('Number of arguments is not relevant. Type help exstraightsynth.'); 40 | return 41 | end; 42 | 43 | %--- Initialize parameters 44 | shiftm = prmS.spectralUpdateInterval; %shiftm=1; % default frame shift (ms) for spectrogram 45 | delsp = prmS.groupDelayStandardDeviation; %delsp=0.5; % standard deviation of random group delay in ms 46 | gdbw = prmS.groupDelaySpatialBandWidth; %gdbw=70; % smoothing window length of random group delay (in Hz) 47 | cornf = prmS.groupDelayRandomizeCornerFrequency; %cornf=4000; % corner frequency for random phase (Hz) 48 | delfrac = prmS.ratioToFundamentalPeriod; %delfrac=0.2; % Fractional group delay (ratio) 49 | delfracind = prmS.ratioModeIndicator; %delfracind=0; % Use fractional group dealy, if this is set 1. 50 | normalizedOut = prmS.levelNormalizationIndicator; %normalizedOut = 1; % Normalize voiced part level, when this is set 1. 51 | headRoom = prmS.headRoomToClip; %headRoom = 22; % Head room from voiced part rms to the clipping level. (dB) 52 | lsegment = prmS.powerCheckSegmentLength; %lsegment = 15; % Segment length for voiced power check (ms) 53 | imap = prmS.timeAxisMappingTable; % imap = 1 represents identity mapping. 54 | pconv = prmS.fundamentalFrequencyMappingTable; %pconv = 1 represents identity mapping. 55 | fconv = prmS.frequencyAxisMappingTable; %fconv = 1 represents identity mapping. 56 | sconv = prmS.timeAxisStretchingFactor; %sconv = 1; % This is a simple coefficient. 57 | imgi = prmS.DisplayPlots; % default 0, 1: display on 58 | lowestF0 = prmS.lowestF0; % compatible default is 50 Hz 59 | 60 | [sy,statusReport] =straightSynthTB07ca(n3sgram,f0raw,shiftm,fs, ... 61 | pconv,fconv,sconv,gdbw,delfrac,delsp,cornf,delfracind,ap,imap,imgi,lowestF0); % revised 27/Nov./2005 62 | if normalizedOut 63 | dBsy=zpowerchk(sy,fs,lsegment); % 23/Sept./1999 64 | cf=(20*log10(32768)-headRoom)-dBsy; 65 | sy=sy*(10.0.^(cf/20)); 66 | end; 67 | prmS.statusReport = statusReport; 68 | end 69 | 70 | %%%----- Internal functions 71 | function pow=zpowerchk(x,fs,segms) 72 | % Calculate average power of voiced portion 73 | % pow=powerchk(x,fs,segms) 74 | % x : signal 75 | % fs : sampling frequency (Hz) 76 | % segms : segment length (ms) 77 | 78 | % 23/Sept./1999 updated 79 | 80 | x1=x(:); 81 | iv=(1:length(x1))'; 82 | x1(isnan(x1))=iv(isnan(x1))*0+0.0000000001; 83 | x2=x1.*x1; 84 | n=round(segms/1000*fs); % 23/Sept./1999 85 | nw=ceil(length(x)/n); 86 | if rem(length(x),n)>0 87 | x2=[x2;0.000001*randn(n*nw-length(x),1).^2]; % 23/Sept./1999 88 | end; 89 | x2(x2==0)=x2(x2==0)+0.000001; 90 | 91 | pw=sum(reshape(x2,n,nw))/n; 92 | 93 | pow=10*log10(mean(pw(pw>(mean(pw)/30)))); 94 | end 95 | 96 | %%%---- Initialize parameters 97 | function prm=zinitializeParameters 98 | prm.spectralUpdateInterval = 1; %shiftm=1; % default frame shift (ms) for spectrogram 99 | prm.groupDelayStandardDeviation = 0.5; %delsp=0.5; % standard deviation of random group delay in ms 100 | prm.groupDelaySpatialBandWidth = 70; %gdbw=70; % smoothing window length of random group delay (in Hz) 101 | prm.groupDelayRandomizeCornerFrequency = 4000; %cornf=4000; % corner frequency for random phase (Hz) 102 | prm.ratioToFundamentalPeriod = 0.2; %delfrac=0.2; % Fractional group delay (ratio) 103 | prm.ratioModeIndicator = 0; %delfracind=0; % Use fractional group dealy, if this is set 1. 104 | prm.levelNormalizationIndicator = 1; %normalizedOut = 1; % Normalize voiced part level, when this is set 1. 105 | prm.headRoomToClip = 22; %headRoom = 22; % Head room from voiced part rms to the clipping level. (dB) 106 | prm.powerCheckSegmentLength = 15; %lsegment = 15; % Segment length for voiced power check (ms) 107 | prm.timeAxisMappingTable = 1; % imap = 1 represents identity mapping. 108 | prm.fundamentalFrequencyMappingTable = 1; %pconv = 1 represents identity mapping. 109 | prm.frequencyAxisMappingTable = 1; %fconv = 1 represents identity mapping. 110 | prm.timeAxisStretchingFactor = 1; %sconv = 1; % This is a simple coefficient. 111 | prm.DisplayPlots = 0; % default 0, 1:disply on 112 | prm.lowestF0 = 50; % default that was not as same as the previous version but consistent 113 | end 114 | 115 | %%%---- 116 | function prm=replaceSuppliedParameters(prmin) 117 | prm=zinitializeParameters; 118 | if isfield(prmin,'spectralUpdateInterval')==1; 119 | prm.spectralUpdateInterval=prmin.spectralUpdateInterval;end; 120 | if isfield(prmin,'groupDelayStandardDeviation')==1; 121 | prm.groupDelayStandardDeviation=prmin.groupDelayStandardDeviation;end; 122 | if isfield(prmin,'groupDelaySpatialBandWidth')==1; 123 | prm.groupDelaySpatialBandWidth=prmin.groupDelaySpatialBandWidth;end; 124 | if isfield(prmin,'groupDelayRandomizeCornerFrequency')==1; 125 | prm.groupDelayRandomizeCornerFrequency=prmin.groupDelayRandomizeCornerFrequency;end; 126 | if isfield(prmin,'ratioToFundamentalPeriod')==1; 127 | prm.ratioToFundamentalPeriod=prmin.ratioToFundamentalPeriod;end; 128 | if isfield(prmin,'ratioModeIndicator')==1; 129 | prm.ratioModeIndicator=prmin.ratioModeIndicator;end; 130 | if isfield(prmin,'levelNormalizationIndicator')==1; 131 | prm.levelNormalizationIndicator=prmin.levelNormalizationIndicator;end; 132 | if isfield(prmin,'headRoomToClip')==1; 133 | prm.headRoomToClip=prmin.headRoomToClip;end; 134 | if isfield(prmin,'powerCheckSegmentLength')==1; 135 | prm.powerCheckSegmentLength=prmin.powerCheckSegmentLength;end; 136 | if isfield(prmin,'timeAxisMappingTable')==1; 137 | prm.timeAxisMappingTable=prmin.timeAxisMappingTable;end; 138 | if isfield(prmin,'fundamentalFrequencyMappingTable')==1; 139 | prm.fundamentalFrequencyMappingTable=prmin.fundamentalFrequencyMappingTable;end; 140 | if isfield(prmin,'frequencyAxisMappingTable')==1; 141 | prm.frequencyAxisMappingTable=prmin.frequencyAxisMappingTable;end; 142 | if isfield(prmin,'timeAxisStretchingFactor')==1; 143 | prm.timeAxisStretchingFactor=prmin.timeAxisStretchingFactor;end; 144 | if isfield(prmin,'DisplayPlots')==1; 145 | prm.DisplayPlots=prmin.DisplayPlots;end; 146 | if isfield(prmin,'lowestF0')==1; 147 | prm.lowestF0=prmin.lowestF0;end; 148 | end -------------------------------------------------------------------------------- /src/f0track5.m: -------------------------------------------------------------------------------- 1 | function [f0,irms,df,amp]=f0track5(f0v,vrv,dfv,pwt,pwh,aav,shiftm,imgi) 2 | 3 | % F0 trajectory tracker 4 | % [f0,irms,df,amp]=f0track2(f0v,vrv,dfv,shiftm,imgi) 5 | % f0 : extracted F0 (Hz) 6 | % irms : relative interfering energy in rms 7 | % 8 | % f0v : fixed point frequency vector 9 | % vrv : relative interfering energy vector 10 | % dfv : fixed point slope vector 11 | % pwt : total power 12 | % pwh : power in higher frequency range 13 | % aav : amplitude list for fixed points 14 | % shiftm : frame update period (ms) 15 | % imgi : display indicator, 1: display on (default), 0: off 16 | % 17 | % This is a very primitive and conventional algorithm. 18 | 19 | % coded by Hideki Kawahara 20 | % copyright(c) Wakayama University/CREST/ATR 21 | % 10/April/1999 first version 22 | % 17/May/1999 relative fq jump thresholding 23 | % 01/August/1999 parameter tweeking 24 | % 07/Dec./2002 waitbar was added 25 | % 13/Jan./2005 bug fix on lines 58, 97 (Thanx Ishikasa-san) 26 | % 30/April/2005 modification for Matlab v7.0 compatibility 27 | % 10/Aug./2005 modified by Takahashi on waitbar 28 | % 10/Sept./2005 modified by Kawahara on waitbar 29 | 30 | if nargin==7; imgi=1; end; %10/Sept./2005 31 | vrv=sqrt(vrv); 32 | [~,mm]=size(vrv); 33 | mm=min(mm,length(pwt)); 34 | 35 | f0=zeros(1,mm); 36 | irms=ones(1,mm); 37 | df=ones(1,mm); 38 | amp=zeros(1,mm); 39 | von=0; 40 | [mxvr,ixx]=min(vrv); 41 | hth=0.12; % highly confident voiced threshould (updated on 01/August/1999) 42 | lth=0.9; % threshold to loose confidence 43 | bklm=100; % back track length for voicing decision 44 | lalm=10; % look ahead length for silence decision 45 | bkls=bklm/shiftm; 46 | lals=lalm/shiftm; 47 | htr=10*log10(pwh./pwt); 48 | 49 | thf0j=0.04*sqrt(shiftm); % 4 % of F0 is the limit of jump 50 | ii=1; 51 | f0ref=0; 52 | htrth=-2.0; % was -3 mod 2002.6.3 53 | if imgi==1; hpg=waitbar(0,'F0 tracking'); end; % 07/Dec./2002 by H.K.%10/Aug./2005 54 | while ii < mm+1 55 | if (von == 0) && (mxvr(ii)10000)+(f0v(jxx,jj)>10000); 61 | if (((gomi>thf0j) || (vrv(jxx,jj)>lth) || (htr(jj)>htrth))&&(f0v(jxx,jj)<1000)) && htr(jj)>-18 62 | % disp(['break pt1 at ' num2str(jj)]) 63 | break 64 | end; 65 | if (gomi>thf0j) 66 | % disp(['break pt2 at ' num2str(jj)]) 67 | break 68 | end; 69 | 70 | f0(jj)=f0v(jxx,jj); 71 | irms(jj)=vrv(jxx,jj); 72 | df(jj)=dfv(jxx,jj); 73 | amp(jj)=aav(jxx,jj); 74 | f0ref=f0(jj); 75 | end; 76 | f0ref=f0v(ixx(ii),ii); 77 | end; 78 | if (f0ref>0) && (f0ref<10000) 79 | [gomi,jxx]=min(abs((f0v(:,ii)-f0ref)/f0ref)); 80 | else 81 | gomi=10; 82 | end; 83 | if (von ==1) && (mxvr(ii)>hth) 84 | for jj=ii:min(mm,ii+lals) 85 | ii=jj; 86 | [gomi,jxx]=min(abs((f0v(:,ii)-f0ref)/f0ref)); 87 | gomi=gomi+(f0ref>10000)+(f0v(jxx,ii)>10000); 88 | if (gomi< thf0j) && ((htr(ii)=1000)) 89 | f0(ii)=f0v(jxx,ii); 90 | irms(ii)=vrv(jxx,ii); 91 | df(ii)=dfv(jxx,ii); 92 | amp(ii)=aav(jxx,ii); 93 | f0ref=f0(ii); 94 | end; 95 | if (gomi>thf0j) || (vrv(jxx,ii)>lth) || ((htr(ii)>htrth)&&(f0v(jxx,ii)<1000)) 96 | von = 0;f0ref=0; 97 | break 98 | end; 99 | end; 100 | elseif (von==1) && (gomi < thf0j) && ((htr(ii)=1000)) 101 | f0(ii)=f0v(jxx,ii); 102 | irms(ii)=vrv(jxx,ii); 103 | df(ii)=dfv(jxx,ii); 104 | amp(ii)=aav(jxx,ii); 105 | f0ref=f0(ii); 106 | else 107 | von=0; 108 | end; 109 | if imgi==1; waitbar(ii/mm); end; %,hpg); % 07/Dec./2002 by H.K.%10/Aug./2005 110 | ii=ii+1; 111 | end; 112 | if imgi==1; close(hpg); end;%10/Aug./2005 113 | -------------------------------------------------------------------------------- /src/fixpF0VexMltpBG4.m: -------------------------------------------------------------------------------- 1 | function [f0v,vrv,dfv,nf,aav]=fixpF0VexMltpBG4(x,fs,f0floor,nvc,nvo,mu,imgi,shiftm,smp,minm,pc,nc) 2 | 3 | % Fixed point analysis to extract F0 4 | % [f0v,vrv,dfv,nf]=fixpF0VexMltpBG4(x,fs,f0floor,nvc,nvo,mu,imgi,shiftm,smp,minm,pc,nc) 5 | % x : input signal 6 | % fs : sampling frequency (Hz) 7 | % f0floor : lowest frequency for F0 search 8 | % nvc : total number of filter channels 9 | % nvo : number of channels per octave 10 | % mu : temporal stretching factor 11 | % imgi : image display indicator (1: display image, default) 12 | % shiftm : frame shift in ms 13 | % smp : smoothing length relative to fc (ratio) 14 | % minm : minimum smoothing length (ms) 15 | % pc : exponent to represent nonlinear summation 16 | % nc : number of harmonic component to use (1,2,3) 17 | 18 | % Designed and coded by Hideki Kawahara 19 | % 28/March/1999 20 | % 04/April/1999 revised to multi component version 21 | % 07/April/1999 bi-reciprocal smoothing for multi component compensation 22 | % 01/May/1999 first derivative of Amplitude is taken into account 23 | % 17/Dec./2000 display bug fix 24 | % 19/Sep./2002 bug fix (mu information was discarded.) 25 | % 07/Dec./2002 waitbar was added 26 | % 30/April/2005 modification for Matlab v7.0 compatibility 27 | % 10/Aug./2005 modified by Takahashi on waitbar 28 | % 10/Sept./2005 mofidied by Kawahara on waitbar 29 | % 11/Sept./2005 fixed waitbar problem 30 | 31 | %f0floor=40; 32 | %nvo=12; 33 | %nvc=52; 34 | %mu=1.1; 35 | 36 | x=cleaninglownoise(x,fs,f0floor); 37 | 38 | fxx=f0floor*2.0.^((0:nvc-1)/nvo)'; 39 | fxh=max(fxx); 40 | 41 | dn=max(1,floor(fs/(fxh*6.3))); 42 | 43 | if nc>2 44 | pm3=multanalytFineCSPB(decimate(x,dn),fs/dn,f0floor,nvc,nvo,mu,3,imgi); % error crrect 2002.9.19 (mu was fixed 1.1) 45 | pif3=zwvlt2ifq(pm3,fs/dn); 46 | [~,mm]=size(pif3); 47 | pif3=pif3(:,1:3:mm); 48 | pm3=pm3(:,1:3:mm); 49 | end; 50 | 51 | if nc>1 52 | pm2=multanalytFineCSPB(decimate(x,dn),fs/dn,f0floor,nvc,nvo,mu,2,imgi);% error crrect 2002.9.19(mu was fixed 1.1) 53 | pif2=zwvlt2ifq(pm2,fs/dn); 54 | [~,mm]=size(pif2); 55 | pif2=pif2(:,1:3:mm); 56 | pm2=pm2(:,1:3:mm); 57 | end; 58 | 59 | pm1=multanalytFineCSPB(decimate(x,dn*3),fs/(dn*3),f0floor,nvc,nvo,mu,1,imgi);% error crrect 2002.9.19(mu was fixed 1.1) 60 | %%%% safe guard added on 15/Jan./2003 61 | mxpm1=max(max(abs(pm1))); 62 | eeps=mxpm1/10000000; 63 | pm1(pm1==0)=pm1(pm1==0)+eeps; 64 | %%%% safe guard end 65 | pif1=zwvlt2ifq(pm1,fs/(dn*3)); 66 | %keyboard; 67 | 68 | [~,mm1]=size(pif1); 69 | mm=mm1; 70 | if nc>1 71 | [~,mm2]=size(pif2); 72 | mm=min(mm1,mm2); 73 | end; 74 | 75 | if nc>2 76 | [~,mm3]=size(pif3); 77 | mm=min([mm1 mm2 mm3]); 78 | end; 79 | 80 | if nc == 2 81 | for ii=1:mm 82 | pif2(:,ii)=(pif1(:,ii).*(abs(pm1(:,ii))).^pc ... 83 | +pif2(:,ii)/2.*(abs(pm2(:,ii))).^pc )... 84 | ./((abs(pm1(:,ii))).^pc+(abs(pm2(:,ii))).^pc); 85 | end; 86 | end; 87 | if nc == 3 88 | for ii=1:mm 89 | pif2(:,ii)=(pif1(:,ii).*(abs(pm1(:,ii))).^pc ... 90 | +pif2(:,ii)/2.*(abs(pm2(:,ii))).^pc ... 91 | +pif3(:,ii)/3.*(abs(pm3(:,ii))).^pc )... 92 | ./((abs(pm1(:,ii))).^pc+(abs(pm2(:,ii))).^pc+(abs(pm3(:,ii))).^pc); 93 | end; 94 | end; 95 | if nc == 1 96 | pif2=pif1; 97 | end; 98 | 99 | 100 | %pif2=zwvlt2ifq(pm,fs/dn)*2*pi; 101 | pif2=pif2*2*pi; 102 | dn=dn*3; 103 | 104 | [slp,~]=zifq2gpm2(pif2,f0floor,nvo); 105 | [nn,mm]=size(pif2); 106 | dpif=(pif2(:,2:mm)-pif2(:,1:mm-1))*fs/dn; 107 | dpif(:,mm)=dpif(:,mm-1); 108 | [dslp,~]=zifq2gpm2(dpif,f0floor,nvo); 109 | 110 | damp=(abs(pm1(:,2:mm))-abs(pm1(:,1:mm-1)))*fs/dn; 111 | damp(:,mm)=damp(:,mm-1); 112 | damp=damp./abs(pm1); 113 | 114 | %[c1,c2]=znormwght(1000); 115 | fxx=f0floor*2.0.^((0:nn-1)/nvo)'*2*pi; 116 | mmp=0*dslp; 117 | [c1,c2b]=znrmlcf2(1); 118 | if imgi==1; hpg=waitbar(0,'P/N map calculation'); end; % 07/Dec./2002 by H.K.%10/Aug./2005 119 | for ii=1:nn 120 | % [c1,c2]=znrmlcf2(fxx(ii)/2/pi); % This is OK, but the next Eq is much faster. 121 | c2=c2b*(fxx(ii)/2/pi)^2; 122 | cff=damp(ii,:)/fxx(ii)*2*pi*0; 123 | mmp(ii,:)=(dslp(ii,:)./(1+cff.^2)/sqrt(c2)).^2+(slp(ii,:)./sqrt(1+cff.^2)/sqrt(c1)).^2; 124 | if imgi==1; waitbar(ii/nn); end; %,hpg); % 07/Dec./2002 by H.K.%10/Aug./2005 125 | end; 126 | if imgi==1; close(hpg); end;%10/Aug./2005 127 | 128 | if smp~=0 129 | smap=zsmoothmapB(mmp,fs/dn,f0floor,nvo,smp,minm,0.4); 130 | else 131 | smap=mmp; 132 | end; 133 | 134 | fixpp=zeros(round(nn/3),mm); 135 | fixvv=fixpp+100000000; 136 | fixdf=fixpp+100000000; 137 | fixav=fixpp+1000000000; 138 | nf=zeros(1,mm); 139 | if imgi==1; hpg=waitbar(0,'Fixed pints calculation'); end; % 07/Dec./2002 by H.K.%10/Aug./2005 140 | for ii=1:mm 141 | [ff,vv,df,aa]=zfixpfreq3(fxx,pif2(:,ii),smap(:,ii),dpif(:,ii)/2/pi,pm1(:,ii)); 142 | kk=length(ff); 143 | fixpp(1:kk,ii)=ff; 144 | fixvv(1:kk,ii)=vv; 145 | fixdf(1:kk,ii)=df; 146 | fixav(1:kk,ii)=aa; 147 | nf(ii)=kk; 148 | if imgi==1 && rem(ii,10)==0; waitbar(ii/mm); end;% 07/Dec./2002 by H.K.%10/Aug./2005 149 | end; 150 | if imgi==1; close(hpg); end; % 07/Dec./2002 by H.K.%10/Aug./2005 151 | fixpp(fixpp==0)=fixpp(fixpp==0)+1000000; 152 | 153 | %keyboard 154 | %[vvm,ivv]=min(fixvv); 155 | % 156 | %for ii=1:mm 157 | % ff00(ii)=fixpp(ivv(ii),ii); 158 | % esgm(ii)=fixvv(ivv(ii),ii); 159 | %end; 160 | np=max(nf); 161 | f0v=fixpp(1:np,round(1:shiftm/dn*fs/1000:mm))/2/pi; 162 | vrv=fixvv(1:np,round(1:shiftm/dn*fs/1000:mm)); 163 | dfv=fixdf(1:np,round(1:shiftm/dn*fs/1000:mm)); 164 | aav=fixav(1:np,round(1:shiftm/dn*fs/1000:mm)); 165 | nf=nf(round(1:shiftm/dn*fs/1000:mm)); 166 | 167 | if imgi==1 168 | cnmap(fixpp,smap,fs,dn,nvo,f0floor,shiftm); 169 | end; 170 | %ff00=ff00(round(1:shiftm/dn*fs/1000:mm)); 171 | %esgm=sqrt(esgm(round(1:shiftm/dn*fs/1000:mm))); 172 | %keyboard; 173 | 174 | return; 175 | %------------------------------------------------------------------ 176 | function okid=cnmap(fixpp,smap,fs,dn,nvo,f0floor,shiftm) 177 | 178 | % This function had a bug in map axis. 179 | % 17/Dec./2000 bug fix by Hideki Kawahara. 180 | 181 | dt=dn/fs; 182 | [nn,mm]=size(smap); 183 | aa=figure; 184 | set(aa,'PaperPosition',[0.3 0.25 8 10.9]); 185 | set(aa,'Position',[30 130 520 680]); 186 | subplot(211); 187 | imagesc([0 (mm-1)*dt*1000],[1 nn],20*log10(smap(:,round(1:shiftm/dn*fs/1000:mm))));axis('xy') 188 | hold on; 189 | tx=((1:shiftm/dn*fs/1000:mm)-1)*dt*1000; 190 | plot(tx,(nvo*log(fixpp(:,round(1:shiftm/dn*fs/1000:mm))/f0floor/2/pi)/log(2)+0.5)','ko'); 191 | plot(tx,(nvo*log(fixpp(:,round(1:shiftm/dn*fs/1000:mm))/f0floor/2/pi)/log(2)+0.5)','w.'); 192 | hold off 193 | xlabel('time (ms)'); 194 | ylabel('channel #'); 195 | colormap(jet); 196 | 197 | okid=1; 198 | return; 199 | 200 | %------------------------------------------------------------------ 201 | 202 | %%function pm=zmultanalytFineCSPm(x,fs,f0floor,nvc,nvo,mu,mlt); 203 | 204 | % Dual waveleta analysis using cardinal spline manipulation 205 | % pm=multanalytFineCSP(x,fs,f0floor,nvc,nvo); 206 | % Input parameters 207 | % 208 | % x : input signal (2kHz sampling rate is sufficient.) 209 | % fs : sampling frequency (Hz) 210 | % f0floor : lower bound for pitch search (60Hz suggested) 211 | % nvc : number of total voices for wavelet analysis 212 | % nvo : number of voices in an octave 213 | % mu : temporal stretch factor 214 | % Outpur parameters 215 | % pm : wavelet transform using iso-metric Gabor function 216 | % 217 | % If you have any questions, mailto:kawahara@hip.atr.co.jp 218 | % 219 | % Copyright (c) ATR Human Information Processing Research Labs. 1996 220 | % Invented and coded by Hideki Kawahara 221 | % 30/Oct./1996 222 | 223 | %t0=1/f0floor; 224 | %lmx=round(6*t0*fs*mu); 225 | %wl=2^ceil(log(lmx)/log(2)); 226 | %x=x(:)'; 227 | %nx=length(x); 228 | %tx=[x,zeros(1,wl)]; 229 | %gent=((1:wl)-wl/2)/fs; 230 | 231 | %nvc=18; 232 | 233 | %wd=zeros(nvc,wl); 234 | %wd2=zeros(nvc,wl); 235 | %ym=zeros(nvc,nx); 236 | %pm=zeros(nvc,nx); 237 | %mpv=1; 238 | %mu=1.0; 239 | %for ii=1:nvc 240 | % t=gent*mpv; 241 | % t=t(abs(t)<3.5*mu*t0); 242 | % wbias=round((length(t)-1)/2); 243 | % wd1=exp(-pi*(t/t0/mu).^2);%.*exp(i*2*pi*t/t0); 244 | % wd2=max(0,1-abs(t/t0/mu)); 245 | % wd2=wd2(wd2>0); 246 | % wwd=conv(wd2,wd1); 247 | % wwd=wwd(abs(wwd)>0.0001); 248 | % wbias=round((length(wwd)-1)/2); 249 | % wwd=wwd.*exp(i*2*pi*mlt*t(round((1:length(wwd))-wbias+length(t)/2))/t0); 250 | % pmtmp1=fftfilt(wwd,tx); 251 | % pm(ii,:)=pmtmp1(wbias+1:wbias+nx)*sqrt(mpv); 252 | % mpv=mpv*(2.0^(1/nvo)); 253 | % keyboard; 254 | %end; 255 | %[nn,mm]=size(pm); 256 | %pm=pm(:,1:mlt:mm); 257 | 258 | %---------------------------------------------------------------- 259 | function pif=zwvlt2ifq(pm,fs) 260 | % Wavelet to instantaneous frequency map 261 | % fqv=wvlt2ifq(pm,fs) 262 | 263 | % Coded by Hideki Kawahara 264 | % 02/March/1999 265 | 266 | [~,mm]=size(pm); 267 | pm=pm./(abs(pm)); 268 | pif=abs(pm(:,:)-[pm(:,1),pm(:,1:mm-1)]); 269 | pif=fs/pi*asin(pif/2); 270 | pif(:,1)=pif(:,2); 271 | 272 | %---------------------------------------------------------------- 273 | 274 | function [slp,pbl]=zifq2gpm2(pif,f0floor,nvo) 275 | % Instantaneous frequency 2 geometric parameters 276 | % [slp,pbl]=ifq2gpm(pif,f0floor,nvo) 277 | % slp : first order coefficient 278 | % pbl : second order coefficient 279 | 280 | % Coded by Hideki Kawahara 281 | % 02/March/1999 282 | 283 | [nn,~]=size(pif); 284 | fx=f0floor*2.0.^((0:nn-1)/nvo)*2*pi; 285 | 286 | c=2.0^(1/nvo); 287 | g=[1/c/c 1/c 1;1 1 1;c*c c 1]; 288 | h=inv(g); 289 | 290 | %slp=pif(1:nn-2,:)*h(1,1)+pif(2:nn-1,:)*h(1,2)+pif(3:nn,:)*h(1,3); 291 | slp=((pif(2:nn-1,:)-pif(1:nn-2,:))/(1-1/c) ... 292 | +(pif(3:nn,:)-pif(2:nn-1,:))/(c-1))/2; 293 | slp=[slp(1,:);slp;slp(nn-2,:)]; 294 | 295 | pbl=pif(1:nn-2,:)*h(2,1)+pif(2:nn-1,:)*h(2,2)+pif(3:nn,:)*h(2,3); 296 | pbl=[pbl(1,:);pbl;pbl(nn-2,:)]; 297 | 298 | for ii=1:nn 299 | slp(ii,:)=slp(ii,:)/fx(ii); 300 | pbl(ii,:)=pbl(ii,:)/fx(ii); 301 | end; 302 | 303 | %------------------------------------------ 304 | 305 | %function [c1,c2]=znormwght(n) 306 | 307 | %zz=0:1/n:3; 308 | %hh=[diff(zGcBs(zz,0)) 0]*n; 309 | %c1=sum((zz.*hh).^2)/n; 310 | %c2=sum((2*pi*zz.^2.*hh).^2)/n; 311 | 312 | %------------------------------------------- 313 | 314 | function p=zGcBs(x,k) 315 | 316 | tt=x+0.0000001; 317 | p=tt.^k.*exp(-pi*tt.^2).*(sin(pi*tt+0.0001)./(pi*tt+0.0001)).^2; 318 | 319 | 320 | %-------------------------------------------- 321 | function smap=zsmoothmapB(map,fs,f0floor,nvo,mu,mlim,pex) 322 | 323 | [nvc,mm]=size(map); 324 | %mu=0.4; 325 | t0=1/f0floor; 326 | lmx=round(6*t0*fs*mu); 327 | wl=2^ceil(log(lmx)/log(2)); 328 | gent=((1:wl)-wl/2)/fs; 329 | 330 | smap=map; 331 | mpv=1; 332 | zt=0*gent; 333 | iiv=1:mm; 334 | for ii=1:nvc 335 | t=gent*mpv; %t0*mu/mpv*1000 336 | t=t(abs(t)<3.5*mu*t0); 337 | wbias=round((length(t)-1)/2); 338 | wd1=exp(-pi*(t/(t0*(1-pex))/mu).^2); 339 | wd2=exp(-pi*(t/(t0*(1+pex))/mu).^2); 340 | wd1=wd1/sum(wd1); 341 | wd2=wd2/sum(wd2); 342 | tm=fftfilt(wd1,[map(ii,:) zt]); 343 | tm=fftfilt(wd2,[1.0./tm(iiv+wbias) zt]); 344 | smap(ii,:)=1.0./tm(iiv+wbias); 345 | if t0*mu/mpv*1000 > mlim 346 | mpv=mpv*(2.0^(1/nvo)); 347 | end; 348 | end; 349 | 350 | %-------------------------------------------- 351 | %function [ff,vv,df]=zfixpfreq2(fxx,pif2,mmp,dfv) 352 | % 353 | %nn=length(fxx); 354 | %iix=(1:nn)'; 355 | %cd1=pif2-fxx; 356 | %cd2=[diff(cd1);cd1(nn)-cd1(nn-1)]; 357 | %cdd1=[cd1(2:nn);cd1(nn)]; 358 | %fp=(cd1.*cdd1<0).*(cd2<0); 359 | %ixx=iix(fp>0); 360 | %ff=pif2(ixx)+(pif2(ixx+1)-pif2(ixx)).*cd1(ixx)./(cd1(ixx)-cdd1(ixx)); 361 | %vv=mmp(ixx); 362 | %vv=mmp(ixx)+(mmp(ixx+1)-mmp(ixx)).*(ff-fxx(ixx))./(fxx(ixx+1)-fxx(ixx)); 363 | %df=dfv(ixx)+(dfv(ixx+1)-dfv(ixx)).*(ff-fxx(ixx))./(fxx(ixx+1)-fxx(ixx)); 364 | 365 | %-------------------------------------------- 366 | function [ff,vv,df,aa]=zfixpfreq3(fxx,pif2,mmp,dfv,pm) 367 | 368 | aav=abs(pm); 369 | nn=length(fxx); 370 | iix=(1:nn)'; 371 | cd1=pif2-fxx; 372 | cd2=[diff(cd1);cd1(nn)-cd1(nn-1)]; 373 | cdd1=[cd1(2:nn);cd1(nn)]; 374 | fp=(cd1.*cdd1<0).*(cd2<0); 375 | ixx=iix(fp>0); 376 | ff=pif2(ixx)+(pif2(ixx+1)-pif2(ixx)).*cd1(ixx)./(cd1(ixx)-cdd1(ixx)); 377 | %vv=mmp(ixx); 378 | vv=mmp(ixx)+(mmp(ixx+1)-mmp(ixx)).*(ff-fxx(ixx))./(fxx(ixx+1)-fxx(ixx)); 379 | df=dfv(ixx)+(dfv(ixx+1)-dfv(ixx)).*(ff-fxx(ixx))./(fxx(ixx+1)-fxx(ixx)); 380 | aa=aav(ixx)+(aav(ixx+1)-aav(ixx)).*(ff-fxx(ixx))./(fxx(ixx+1)-fxx(ixx)); 381 | 382 | %-------------------------------------------- 383 | function [c1,c2]=znrmlcf2(f) 384 | 385 | n=100; 386 | x=0:1/n:3; 387 | g=zGcBs(x,0); 388 | dg=[diff(g) 0]*n; 389 | dgs=dg/2/pi/f; 390 | xx=2*pi*f*x; 391 | c1=sum((xx.*dgs).^2)/n*2; 392 | c2=sum((xx.^2.*dgs).^2)/n*2; 393 | 394 | %-------------------------------------------- 395 | function x=cleaninglownoise(x,fs,f0floor) 396 | 397 | flm=50; 398 | flp=round(fs*flm/1000); 399 | nn=length(x); 400 | wlp=fir1(flp*2,f0floor/(fs/2)); 401 | wlp(flp+1)=wlp(flp+1)-1; 402 | wlp=-wlp; 403 | 404 | tx=[x(:)' zeros(1,2*length(wlp))]; 405 | ttx=fftfilt(wlp,tx); 406 | x=ttx((1:nn)+flp); 407 | 408 | return; 409 | 410 | -------------------------------------------------------------------------------- /src/fractpitch2.m: -------------------------------------------------------------------------------- 1 | function phs=fractpitch2(fftl) 2 | % Phase rotator for fractional pitch 3 | % This program produces 'phs' as the phase rotator. 4 | 5 | % by Hideki Kawahara 6 | % 22/August/1996 7 | 8 | amp=15; 9 | t=((1:fftl)-fftl/2-1)/fftl*2; 10 | phs=t+(1-exp(amp*t))./(1+exp(amp*t)) ... 11 | -(1+(1-exp(amp))/(1+exp(amp)))*t; 12 | phs(1)=0; 13 | phs=phs*pi; -------------------------------------------------------------------------------- /src/gdmap.m: -------------------------------------------------------------------------------- 1 | function gdm=gdmap(n3sgram,fs) 2 | % gdm=gdmap(n3sgram,fs) 3 | % function to calculate group delay map from 4 | % smoothed time frequency representation 5 | 6 | % Designed and coded by Hideki Kawahara 7 | % 7/Sept./2003 8 | 9 | [nn,mm]=size(n3sgram); 10 | fftl=(nn-1)*2; 11 | 12 | rbb2=fftl/2:-1:2; 13 | gdm=zeros(nn,mm); 14 | for ii=1:mm 15 | ff=[n3sgram(:,ii);n3sgram(rbb2,ii)]; 16 | ccp=real(fft(log(ff))); 17 | ccp2=[ccp(1);2*ccp(2:fftl/2);0*ccp(fftl/2+1:fftl)]; 18 | ffx=(-ifft(ccp2)); 19 | gdt=-diff(imag(ffx)/(2*pi*fs/fftl)); 20 | gdm(:,ii)=[gdt(1);gdt(1:fftl/2)]; 21 | end; 22 | -------------------------------------------------------------------------------- /src/getvalufromedit.m: -------------------------------------------------------------------------------- 1 | function y=getvalufromedit(co,defv) 2 | 3 | ss=get(gco,'String'); 4 | y=str2num(ss); 5 | if (length(y) <1) | (length(y)>1) 6 | y=defv; 7 | end; 8 | -------------------------------------------------------------------------------- /src/isOctave.m: -------------------------------------------------------------------------------- 1 | function output = isOctave 2 | v = ver; 3 | output = strcmp('Octave', v(1).Name); 4 | end -------------------------------------------------------------------------------- /src/mktstr.m: -------------------------------------------------------------------------------- 1 | function tstr=mktstr 2 | % return time string in hh:mm:ss format 3 | % by Hideki Kawahara 4 | % 05/Jan./1995 5 | 6 | anatime=fix(clock); 7 | tstr=[num2str(anatime(4)) ':' num2str(anatime(5)) ':' num2str(anatime(6))]; 8 | -------------------------------------------------------------------------------- /src/multanalytFineCSPB.m: -------------------------------------------------------------------------------- 1 | function pm=multanalytFineCSPB(x,fs,f0floor,nvc,nvo,mu,mlt,imgi) 2 | 3 | % Dual waveleta analysis using cardinal spline manipulation 4 | % pm=multanalytFineCSPB(x,fs,f0floor,nvc,nvo,mu,mlt) 5 | % Input parameters 6 | % 7 | % x : input signal (2kHz sampling rate is sufficient.) 8 | % fs : sampling frequency (Hz) 9 | % f0floor : lower bound for pitch search (60Hz suggested) 10 | % nvc : number of total voices for wavelet analysis 11 | % nvo : number of voices in an octave 12 | % mu : temporal stretch factor 13 | % mlt : harmonic ID# 14 | % imgi : display indicator, 1: dispaly on (default), 0: display off 15 | % Outpur parameters 16 | % pm : wavelet transform using iso-metric Gabor function 17 | % 18 | % If you have any questions, mailto:kawahara@hip.atr.co.jp 19 | % 20 | % Copyright (c) ATR Human Information Processing Research Labs. 1996 21 | % Invented and coded by Hideki Kawahara 22 | % 30/Oct./1996 23 | % 07/Dec./2002 waitbar was added 24 | % 10/Aug./2005 modified by Takahashi on waitbar 25 | % 10/Sept./2005 modified by Kawahara on waitbar 26 | 27 | if nargin==7; imgi=1; end;%10/Sept./2005 28 | t0=1/f0floor; 29 | lmx=round(6*t0*fs*mu); 30 | wl=2^ceil(log(lmx)/log(2)); 31 | x=x(:)'; 32 | nx=length(x); 33 | tx=[x,zeros(1,wl)]; 34 | gent=((1:wl)-wl/2)/fs; 35 | 36 | pm=zeros(nvc,nx); 37 | mpv=1; 38 | if imgi==1; hpg=waitbar(0,['wavelet analysis for initial F0 ' ... 39 | 'and P/N estimation with HM#:' num2str(mlt)]); end; % 07/Dec./2002 by H.K.%10/Aug./2005 40 | for ii=1:nvc 41 | tb=gent*mpv; 42 | t=tb(abs(tb)<3.5*mu*t0); 43 | wd1=exp(-pi*(t/t0/mu).^2); 44 | wd2=max(0,1-abs(t/t0/mu)); 45 | wd2=wd2(wd2>0); 46 | wwd=conv(wd2,wd1); 47 | wwd=wwd(abs(wwd)>0.00001); 48 | wbias=round((length(wwd)-1)/2); 49 | wwd=wwd.*exp(1i*2*pi*mlt*t(round((1:length(wwd))-wbias+length(t)/2))/t0); 50 | pmtmp1=fftfilt(wwd,tx); 51 | pm(ii,:)=pmtmp1(wbias+1:wbias+nx)*sqrt(mpv); 52 | mpv=mpv*(2.0^(1/nvo)); 53 | if imgi==1; waitbar(ii/nvc); end; %,hpg);% 07/Dec./2002 by H.K.%10/Aug./2005 54 | end; 55 | if imgi==1; close(hpg); end; % 07/Dec./2002 by H.K.%10/Aug./2005 56 | -------------------------------------------------------------------------------- /src/optimumsmoothing.m: -------------------------------------------------------------------------------- 1 | function ovc=optimumsmoothing(eta,pc) 2 | % ovc=optimumsmoothing(eta,pc) 3 | % Calculate the optimum smoothing function 4 | % ovc : coefficients for 2nd order cardinal B-spline 5 | % eta : temporal stretch factor 6 | % pc : power exponent for nonlinearity 7 | 8 | % 05/July/2006 dirty patch. This routine has to be re-programmed. 9 | 10 | fx=-8:0.05:8; 11 | cb=max(0,1-abs(fx)); 12 | gw=exp(-pi*(fx*eta*1.4).^2).^pc; 13 | cmw=conv(cb,gw); 14 | bb=(1:length(cb)); 15 | bbc=bb+(length(cb)-1)/2; 16 | cmw=cmw(bbc)/max(cmw); 17 | ss=(abs(fx-round(fx))<0.025).*(1:length(cb)); 18 | ss=ss(ss>0); 19 | cmws=cmw(ss); 20 | 21 | nn=length(cmws); 22 | idv=1:nn; 23 | 24 | hh=zeros(2*nn,nn); 25 | for ii=1:nn 26 | hh((ii-1)+idv,ii)=cmws'; 27 | end; 28 | bv=zeros(2*nn,1); 29 | bv(nn+1)=1; % This is the original unit impulse. 30 | h=hh'*hh; 31 | ov = h \ (hh'*bv); 32 | 33 | idc=(nn-1)/2+2; 34 | 35 | ovc=ov(idc+(0:3)); 36 | -------------------------------------------------------------------------------- /src/plotcpower.m: -------------------------------------------------------------------------------- 1 | function [pw,pwh]=plotcpower(x,fs,shiftm) 2 | % 30/April/2005 modification for Matlab v7.0 compatibility 3 | 4 | flm=8; % originally; 01/August/1999 . 5 | fl=round(flm*fs/1000); 6 | w=hanning(2*fl+1); 7 | w=w/sum(w); 8 | nn=length(x); 9 | 10 | flpm=40; 11 | flp=round(flpm*fs/1000); 12 | wlp=fir1(flp*2,70/(fs/2)); 13 | wlp(flp+1)=wlp(flp+1)-1; 14 | wlp=-wlp; 15 | 16 | tx=[x(:)' zeros(1,2*length(wlp))]; 17 | ttx=fftfilt(wlp,tx); 18 | ttx=ttx((1:nn)+flp); 19 | tx=[ttx(:)' zeros(1,2*length(w))]; 20 | 21 | pw=fftfilt(w,tx.^2); 22 | pw=pw((1:nn)+fl); 23 | mpw=max(pw); 24 | pw=pw(round(1:shiftm*fs/1000:nn)); 25 | pw(pw3kHz) '); 46 | end 47 | -------------------------------------------------------------------------------- /src/powerchk.m: -------------------------------------------------------------------------------- 1 | function pow=powerchk(x,fs,segms) 2 | % Calculate average power of voiced portion 3 | % pow=powerchk(x,fs,segms) 4 | % x : signal 5 | % fs : sampling frequency (Hz) 6 | % segms : segment length (ms) 7 | 8 | % 23/Sept./1999 updated 9 | % 30/April/2005 modification for Matlab v7.0 compatibility 10 | 11 | x1=x(:); 12 | iv=(1:length(x1))'; 13 | x1(isnan(x1))=iv(isnan(x1))*0+0.0000000001; 14 | x2=x1.*x1; 15 | n=round(segms/1000*fs); % 23/Sept./1999 16 | nw=ceil(length(x)/n); 17 | if rem(length(x),n)>0 18 | x2=[x2;0.000001*randn(n*nw-length(x),1).^2]; % 23/Sept./1999 19 | end; 20 | x2(x2==0)=x2(x2==0)+0.000001; 21 | 22 | pw=sum(reshape(x2,n,nw))/n; 23 | 24 | pow=10*log10(mean(pw(pw>(mean(pw)/30)))); 25 | 26 | -------------------------------------------------------------------------------- /src/refineF06.m: -------------------------------------------------------------------------------- 1 | function [f0r,ecr]=refineF06(x,fs,f0raw,fftl,eta,nhmx,shiftm,nl,nu,imgi) 2 | % F0 estimation refinement 3 | % [f0r,ecr]=refineF06(x,fs,f0raw,fftl,nhmx,shiftm,nl,nu,imgi) 4 | % x : input waveform 5 | % fs : sampling frequency (Hz) 6 | % f0raw : F0 candidate (Hz) 7 | % fftl : FFT length 8 | % eta : temporal stretch factor 9 | % nhmx : highest harmonic number 10 | % shiftm : frame shift period (ms) 11 | % nl : lower frame number 12 | % nu : uppter frame number 13 | % imgi : display indicator, 1: display on (default), 0: off 14 | % 15 | % Example of usage (with STRAIGHT) 16 | % 17 | % global xold fs f0shiftm f0raw 18 | % 19 | % dn=floor(fs/(800*3*2)); 20 | % [f0raw,ecr]=refineF02(decimate(xold,dn),fs/dn,f0raw,512,1.1,3,f0shiftm,1,length(f0raw)); 21 | 22 | % Designed and coded by Hideki Kawahara 23 | % 28/July/1999 24 | % 29/July/1999 test version using power weighting 25 | % 30/July/1999 GcBs is added (bug fix) 26 | % 07/August/1999 small bug fix 27 | % 07/Dec./2002 wqitbar was added 28 | % 13.May/2005 minor vulnerability fix 29 | % 10/Aug./2005 modified by Takahashi on waitbar 30 | % 10/Sept./2005 modified by Kawahara on waitbar 31 | % 16/Sept./2005 minor bug fix 32 | % 26/Sept./2005 bug fix 33 | 34 | if nargin==9; imgi=1; end; 35 | f0i=f0raw(:); 36 | f0i(f0i==0)=f0i(f0i==0)+160; 37 | fax=(0:fftl-1)/fftl*fs; 38 | nfr=length(f0i); % 07/August/1999 39 | 40 | shiftl=shiftm/1000*fs; 41 | x=[zeros(fftl,1); x(:) ; zeros(fftl,1)]'; 42 | 43 | tt=((1:fftl)-fftl/2)/fs; 44 | th=(0:fftl-1)/fftl*2*pi; 45 | rr=exp(-1i*th); 46 | 47 | f0t=100; 48 | w1=max(0,1-abs(tt'*f0t/eta)); 49 | w1=w1(w1>0); 50 | wg=exp(-pi*(tt*f0t/eta).^2); 51 | wgg=(wg(abs(wg)>0.0002)); 52 | wo=fftfilt(wgg,[w1; zeros(length(wgg),1)])'; 53 | 54 | xo=(0:length(wo)-1)/(length(wo)-1); 55 | nlo=length(wo)-1; 56 | 57 | if nl*nu <0 58 | nl=1; 59 | nu=nfr; 60 | end; 61 | 62 | bx=1:fftl/2+1; 63 | pif=zeros(fftl/2+1,nfr); 64 | dpif=zeros(fftl/2+1,nfr); 65 | pwm=zeros(fftl/2+1,nfr); 66 | rmsValue = std(x); % 26/Sept./2005 by HK 67 | 68 | if imgi==1; hpg=waitbar(0,'F0 refinement using F0 adaptive analysis'); end; % 07/Dec./2002 by H.K.%10/Aug./2005 69 | for kk=nl:nu 70 | if f0i(kk) < 40 71 | f0i(kk)=40; 72 | end; 73 | f0t=f0i(kk); 74 | xi=0:1/nlo*f0t/100:1; 75 | wa=interp1(xo,wo,xi,'*linear'); 76 | wal=length(wa); 77 | bb=1:wal; 78 | bias=round(fftl-wal/2+(kk-1)*shiftl); 79 | if std(x(bb+bias))*std(x(bb+bias-1))*std(x(bb+bias+1)) == 0 % 26/Sept./2005 by HK 80 | x(bb+bias) = randn(length(bb),1)*rmsValue/100000; 81 | end; 82 | dcl=mean(x(bb+bias)); 83 | ff0=fft((x(bb+bias-1)-dcl).*wa,fftl); 84 | ff1=fft((x(bb+bias)-dcl).*wa,fftl); 85 | ff2=fft((x(bb+bias+1)-dcl).*wa,fftl); 86 | fd=ff2.*rr-ff1; 87 | fd0=ff1.*rr-ff0; 88 | crf=fax+(real(ff1).*imag(fd)-imag(ff1).*real(fd))./(abs(ff1).^2)*fs/pi/2; 89 | crf0=fax+(real(ff0).*imag(fd0)-imag(ff0).*real(fd0))./(abs(ff0).^2)*fs/pi/2; 90 | pif(:,kk)=crf(bx)'*2*pi; 91 | dpif(:,kk)=(crf(bx)-crf0(bx))'*2*pi; 92 | pwm(:,kk)=abs(ff1(bx)'); % 29/July/1999 93 | if imgi==1; waitbar((kk-nl)/(nu-nl)); end; % ,hpg) % 07/Dec./2002 by H.K.%10/Aug./2005 94 | end; 95 | if imgi==1; close(hpg); end; 96 | slp=([pif(2:fftl/2+1,:);pif(fftl/2+1,:)]-pif)/(fs/fftl*2*pi); 97 | dslp=([dpif(2:fftl/2+1,:);dpif(fftl/2+1,:)]-dpif)/(fs/fftl*2*pi)*fs; 98 | mmp=slp*0; 99 | 100 | [c1,c2]=znrmlcf2(shiftm); 101 | fxx=((0:fftl/2)+0.5)/fftl*fs*2*pi; 102 | 103 | %--- calculation of relative noise level 104 | 105 | if imgi==1; hpg=waitbar(0,'P/N calculation'); end; % 07/Dec./2002 by H.K.%10/Aug./2005 106 | for ii=1:fftl/2+1; 107 | c2=c2*(fxx(ii)/2/pi)^2; 108 | mmp(ii,:)=(dslp(ii,:)/sqrt(c2)).^2+(slp(ii,:)/sqrt(c1)).^2; 109 | if imgi==1 && rem(ii,10)==0;waitbar(ii/(fftl/2+1));end; % 07/Dec./2002 by H.K.%10/Aug./2005 110 | end; 111 | if imgi==1; close(hpg); end; % 07/Dec./2002 by H.K.%10/Aug./2005 112 | 113 | %--- Temporal smoothing 114 | 115 | sml=round(1.5*fs/1000/2/shiftm)*2+1; % 3 ms, and odd number 116 | smb=round((sml-1)/2); % bias due to filtering 117 | 118 | if imgi==1; hpg=waitbar(0,'P/N smoothing'); end; % 07/Dec./2002 by H.K.%10/Aug./2005 119 | %This smoothing is modified (30 Nov. 2000). 120 | smmp=fftfilt((hanning(sml).^2)/sum((hanning(sml).^2)), ... 121 | [mmp zeros(fftl/2+1,sml*2)]'+max(max(mmp((~isnan(mmp))&(mmp0); 149 | ecr=sqrt(1.0./vvvf).*(f0raw(:)'>0)+(f0raw(:)'<=0); 150 | if imgi==1; close(hpg); end;%10/Aug./2005 151 | 152 | %keyboard; 153 | 154 | %-------------------- 155 | function [c1,c2]=znrmlcf2(f) 156 | 157 | n=100; 158 | x=0:1/n:3; 159 | g=GcBs(x,0); 160 | dg=[diff(g) 0]*n; 161 | dgs=dg/2/pi/f; 162 | xx=2*pi*f*x; 163 | c1=sum((xx.*dgs).^2)/n; 164 | c2=sum((xx.^2.*dgs).^2)/n; 165 | 166 | %--------------------- 167 | function p=GcBs(x,k) 168 | 169 | tt=x+0.0000001; 170 | p=tt.^k.*exp(-pi*tt.^2).*(sin(pi*tt+0.0001)./(pi*tt+0.0001)).^2; 171 | 172 | 173 | -------------------------------------------------------------------------------- /src/regressionTestBaseGenerator.m: -------------------------------------------------------------------------------- 1 | %% Regression test data generator 2 | % This program should be executed at the very beginning of refactoring a 3 | % major revision. This is for making legacy STRAIGHT v40_007d to be 4 | % compatible with MATLAB R2015b and Octave 5 | 6 | % Copyright(c) 2016, Hideki Kawahara, (kawahara@sys.wakayama-u.ac.jp) 7 | 8 | clear all 9 | close all 10 | 11 | original_speech_dir = '~/Music/VCTK_CORPUS/VCTK-Corpus/wav48/'; 12 | target_analysis_dir = '~/m-file/STRAIGHTV40_007e/analysisData/'; 13 | target_wave_dir = '~/m-file/STRAIGHTV40_007e/waveData/'; 14 | 15 | mkdir(target_analysis_dir); 16 | mkdir(target_wave_dir); 17 | 18 | dir_list = dir([original_speech_dir 'p*']); 19 | 20 | %% 21 | 22 | n_dirs = length(dir_list); 23 | n_files = 0; 24 | for ii = 1:n_dirs 25 | tmp_files = dir([original_speech_dir dir_list(ii).name '/*.wav']); 26 | n_files = n_files + length(tmp_files); 27 | end; 28 | %% 29 | n_test = 2; % number of files tested for each speaker 30 | l_segment = 0.1; % 100 ms segment 31 | for ii = 1:n_dirs 32 | rng(12345); % initialize frozen random number 33 | seq_id = 0; 34 | basic_stat_table = zeros(n_files, 4); 35 | tmp_files = dir([original_speech_dir dir_list(ii).name '/*.wav']); 36 | for jj = 1:length(tmp_files) 37 | [x, fs] = audioread([original_speech_dir dir_list(ii).name '/' ... 38 | tmp_files(jj).name ]); 39 | seq_id = seq_id + 1; 40 | l_in_sample_segment = min(length(x), round(fs * l_segment)); 41 | n_segment = floor(length(x) / l_in_sample_segment); 42 | rms_level = zeros(n_segment, 1); 43 | for kk = 1:n_segment 44 | rms_level(kk) = 20 * ... 45 | log10(std(x((kk - 1) * l_in_sample_segment + ... 46 | (1:l_in_sample_segment)))); 47 | end; 48 | sorted_level = sort(rms_level); 49 | basic_stat_table(seq_id, 1) = length(x) / fs; 50 | basic_stat_table(seq_id, 2) = max(rms_level) - min(rms_level); 51 | basic_stat_table(seq_id, 3) = ... 52 | sorted_level(round(length(sorted_level) * 0.85)); 53 | basic_stat_table(seq_id, 4) = max(abs(x)); 54 | end; 55 | basic_stat_table = basic_stat_table(1:seq_id, :); 56 | % select safe region 57 | sorted_length = sort(basic_stat_table(:, 1)); 58 | sorted_dynamic_range = sort(basic_stat_table(:, 2)); 59 | sorted_85percent = sort(basic_stat_table(:, 3)); 60 | l_10 = sorted_length(round(seq_id * 0.1)); 61 | l_90 = sorted_length(round(seq_id * 0.9)); 62 | d_10 = sorted_dynamic_range(round(seq_id * 0.1)); 63 | d_90 = sorted_dynamic_range(round(seq_id * 0.9)); 64 | v_10 = sorted_85percent(round(seq_id * 0.1)); 65 | v_90 = sorted_85percent(round(seq_id * 0.9)); 66 | index_list = 1:seq_id; 67 | safe_index = index_list( ... 68 | l_10 < basic_stat_table(:, 1) & l_90 > basic_stat_table(:, 1) & ... 69 | d_10 < basic_stat_table(:, 2) & d_90 > basic_stat_table(:, 2) & ... 70 | v_10 < basic_stat_table(:, 3) & v_90 > basic_stat_table(:, 3) & ... 71 | basic_stat_table(:, 4) < 0.95); 72 | selection_index = 1:length(safe_index); 73 | [~, tmp_index] = sort(rand(n_test, 1)); 74 | selection_index = selection_index(tmp_index(1:n_test)); 75 | for kk = 1:n_test 76 | id = safe_index(selection_index(kk)); 77 | [x, fs] = audioread([original_speech_dir dir_list(ii).name '/' ... 78 | tmp_files(id).name ]); 79 | rng(12345); % initialize frozen random number 80 | f0raw = MulticueF0v14(x,fs); 81 | ap = exstraightAPind(x,fs,f0raw); 82 | n3sgram=exstraightspec(x,f0raw,fs); 83 | rng(12345); % initialize frozen random number 84 | y = exstraightsynth(f0raw,n3sgram,ap,fs); 85 | disp([num2str(kk) ': ' tmp_files(id).name ' at:' datestr(now)]); 86 | audiowrite([target_wave_dir '/' tmp_files(id).name], ... 87 | y / max(abs(y)) * 0.9, fs); 88 | path_name_f0 = [target_analysis_dir '/' tmp_files(id).name(1:end-4) 'f0.bin']; 89 | path_name_ap = [target_analysis_dir '/' tmp_files(id).name(1:end-4) 'ap.bin']; 90 | path_name_sp = [target_analysis_dir '/' tmp_files(id).name(1:end-4) 'sp.bin']; 91 | WriteBinaryData(path_name_f0, f0raw) 92 | WriteBinaryData(path_name_ap, ap) 93 | WriteBinaryData(path_name_sp, n3sgram) 94 | end; 95 | end; 96 | -------------------------------------------------------------------------------- /src/regressionTestBaseGeneratorR.m: -------------------------------------------------------------------------------- 1 | %% Regression test data generator 2 | % This program should be executed at the very beginning of refactoring a 3 | % major revision. This is for making legacy STRAIGHT v40_007d to be 4 | % compatible with MATLAB R2015b and Octave 5 | % Revised for initialization 6 | 7 | % Copyright(c) 2016, Hideki Kawahara, (kawahara@sys.wakayama-u.ac.jp) 8 | 9 | clear all 10 | close all 11 | 12 | original_speech_dir = '/Users/kawahara/Music/VCTK_CORPUS/VCTK-Corpus/wav48/'; 13 | if isOctave 14 | target_analysis_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/analysisDataO/'; 15 | target_wave_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/waveDataO/'; 16 | else 17 | target_analysis_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/analysisDataR/'; 18 | target_wave_dir = '/Users/kawahara/m-file/STRAIGHTV40_007e/waveDataR/'; 19 | end; 20 | 21 | mkdir(target_analysis_dir); 22 | mkdir(target_wave_dir); 23 | 24 | dir_list = dir([original_speech_dir 'p*']); 25 | 26 | %% 27 | 28 | n_dirs = length(dir_list); 29 | n_files = 0; 30 | for ii = 1:n_dirs 31 | tmp_files = dir([original_speech_dir dir_list(ii).name '/*.wav']); 32 | n_files = n_files + length(tmp_files); 33 | end; 34 | %% 35 | n_test = 2; % number of files tested for each speaker 36 | l_segment = 0.1; % 100 ms segment 37 | command1 = 'rand("seed", 12345);'; % for Octave 38 | command2 = 'randn("seed", 12345);';% for Octave 39 | for ii = 1:n_dirs 40 | if isOctave 41 | eval(command1); 42 | eval(command2); 43 | else 44 | rng(12345); % initialize frozen random number 45 | end; 46 | seq_id = 0; 47 | basic_stat_table = zeros(n_files, 4); 48 | tmp_files = dir([original_speech_dir dir_list(ii).name '/*.wav']); 49 | for jj = 1:length(tmp_files) 50 | [x, fs] = audioread([original_speech_dir dir_list(ii).name '/' ... 51 | tmp_files(jj).name ]); 52 | seq_id = seq_id + 1; 53 | l_in_sample_segment = min(length(x), round(fs * l_segment)); 54 | n_segment = floor(length(x) / l_in_sample_segment); 55 | rms_level = zeros(n_segment, 1); 56 | for kk = 1:n_segment 57 | rms_level(kk) = 20 * ... 58 | log10(std(x((kk - 1) * l_in_sample_segment + ... 59 | (1:l_in_sample_segment)))); 60 | end; 61 | sorted_level = sort(rms_level); 62 | basic_stat_table(seq_id, 1) = length(x) / fs; 63 | basic_stat_table(seq_id, 2) = max(rms_level) - min(rms_level); 64 | basic_stat_table(seq_id, 3) = ... 65 | sorted_level(round(length(sorted_level) * 0.85)); 66 | basic_stat_table(seq_id, 4) = max(abs(x)); 67 | end; 68 | basic_stat_table = basic_stat_table(1:seq_id, :); 69 | % select safe region 70 | sorted_length = sort(basic_stat_table(:, 1)); 71 | sorted_dynamic_range = sort(basic_stat_table(:, 2)); 72 | sorted_85percent = sort(basic_stat_table(:, 3)); 73 | l_10 = sorted_length(round(seq_id * 0.1)); 74 | l_90 = sorted_length(round(seq_id * 0.9)); 75 | d_10 = sorted_dynamic_range(round(seq_id * 0.1)); 76 | d_90 = sorted_dynamic_range(round(seq_id * 0.9)); 77 | v_10 = sorted_85percent(round(seq_id * 0.1)); 78 | v_90 = sorted_85percent(round(seq_id * 0.9)); 79 | index_list = 1:seq_id; 80 | safe_index = index_list( ... 81 | l_10 < basic_stat_table(:, 1) & l_90 > basic_stat_table(:, 1) & ... 82 | d_10 < basic_stat_table(:, 2) & d_90 > basic_stat_table(:, 2) & ... 83 | v_10 < basic_stat_table(:, 3) & v_90 > basic_stat_table(:, 3) & ... 84 | basic_stat_table(:, 4) < 0.95); 85 | selection_index = 1:length(safe_index); 86 | [~, tmp_index] = sort(rand(n_test, 1)); 87 | selection_index = selection_index(tmp_index(1:n_test)); 88 | for kk = 1:n_test 89 | id = safe_index(selection_index(kk)); 90 | [x, fs] = audioread([original_speech_dir dir_list(ii).name '/' ... 91 | tmp_files(id).name ]); 92 | if isOctave 93 | eval(command1); 94 | eval(command2); 95 | else 96 | rng(12345); % initialize frozen random number 97 | end; 98 | f0raw = MulticueF0v14(x,fs); 99 | ap = exstraightAPind(x,fs,f0raw); 100 | n3sgram=exstraightspec(x,f0raw,fs); 101 | if isOctave 102 | eval(command1); 103 | eval(command2); 104 | else 105 | rng(12345); % initialize frozen random number 106 | end; 107 | y = exstraightsynth(f0raw,n3sgram,ap,fs); 108 | disp([num2str(kk) ': ' tmp_files(id).name ' at:' datestr(now)]); 109 | audiowrite([target_wave_dir '/' tmp_files(id).name], ... 110 | y / max(abs(y)) * 0.9, fs); 111 | path_name_f0 = [target_analysis_dir '/' tmp_files(id).name(1:end-4) 'f0.bin']; 112 | path_name_ap = [target_analysis_dir '/' tmp_files(id).name(1:end-4) 'ap.bin']; 113 | path_name_sp = [target_analysis_dir '/' tmp_files(id).name(1:end-4) 'sp.bin']; 114 | WriteBinaryData(path_name_f0, f0raw) 115 | WriteBinaryData(path_name_ap, ap) 116 | WriteBinaryData(path_name_sp, n3sgram) 117 | end; 118 | end; 119 | -------------------------------------------------------------------------------- /src/smax.m: -------------------------------------------------------------------------------- 1 | function y=smax(x,a,b) 2 | 3 | y0=1.0/(1+exp(-a*(0-b))); 4 | y1=1.0/(1+exp(-a*(1-b))); 5 | y=(1.0./(1+exp(-a*(x-b)))-y0)/(y1-y0); 6 | -------------------------------------------------------------------------------- /src/specreshape.m: -------------------------------------------------------------------------------- 1 | function n2sgram3=specreshape(fs,n2sgram,eta,pc,mag,f0,imgi) 2 | % Spectral compensation using Time Domain technique 3 | % n2sgram3=specreshape(fs,n2sgram,eta,pc,mag,f0); 4 | % fs : sampling frequency (Hz) 5 | % n2sgram : Straight smoothed spectrogram (optimum smoother is assumed) 6 | % eta : temporal stretch factor 7 | % pc : power exponent for nonlinearity 8 | % mag : magnification factor of Time Domain compensation 9 | % f0 : fundamental frequency (Hz) 10 | % imgi : display indicator, 1: display on (default), 0: off 11 | 12 | % coded by Hideki Kawahara 13 | % 13/Aug./1997 14 | % 08/Dec./2002 15 | % Note: This part may be redundant. It is better to 16 | % evaluate contribution of this part again. (08/Dec./2002) 17 | % 10/Aug./2005 modified by Takahashi on waitbar 18 | % 10/Sept./2005 modified by Kawahara on waitbar 19 | 20 | if nargin==6; imgi=1; end;%10/Sept./2005 21 | [nn,mm]=size(n2sgram); 22 | fftl=(nn-1)*2; 23 | fbb=1:nn; 24 | rbb=(nn-1:-1:2); 25 | rbb2=(fftl:-1:nn+1); 26 | bb3=(2:nn-1); 27 | n2sgram3=n2sgram*0; 28 | 29 | ovc=optimumsmoothing(eta,pc); 30 | hh=[1 1 1 1; 0 1/2 2/3 3/4; 0 0 1/3 2/4; 0 0 0 1/4]; 31 | %%bb=inv(hh)*ovc; 32 | bb=hh \ ovc; 33 | tt=((0:fftl-1))'/fs; 34 | pb2=(pi/(eta^2)+(pi^2)/3*(bb(1)+4*bb(2)+9*bb(3)+16*bb(4)))*tt.^2; 35 | 36 | if imgi==1; hpg=waitbar(0,'time domain spectral compensation of windowing effects'); end; % 08/Dec./2002%10/Aug./2005 37 | for ii=1:mm 38 | ffs=[n2sgram(:,ii);n2sgram(rbb,ii)]; 39 | ccs2=real(fft(ffs)).*min(20,(1+mag*pb2*f0(ii)^2)); 40 | ccs2(rbb2)=ccs2(bb3); 41 | ngg=real(ifft(ccs2)); 42 | n2sgram3(:,ii)=ngg(fbb); 43 | if imgi==1 && rem(ii,20)==0;%10/Aug./2005 44 | waitbar(ii/mm);% 08/Dec./2002 45 | end; 46 | end; 47 | if imgi==1; close(hpg); end; % 08/Dec./2002%10/Aug./2005 48 | 49 | %n2sgram3=(abs(n2sgram3)+n2sgram3)/2; 50 | n2sgram3=(abs(n2sgram3)+n2sgram3)/2+0.1; 51 | -------------------------------------------------------------------------------- /src/straight.m: -------------------------------------------------------------------------------- 1 | % Starter command for GUI-STRAIGHT 2 | 3 | straightCIv1 GUIinitialize; 4 | -------------------------------------------------------------------------------- /src/straightBodyC03ma.m: -------------------------------------------------------------------------------- 1 | function [n2sgram,nsgram]=straightBodyC03ma(x,fs,shiftm,fftl,f0raw,f0var,f0varL,eta,pc,imgi) 2 | % [n2sgram,nsgram]=straightBodyC03ma(x,fs,shiftm,fftl,f0raw,f0var,f0varL,eta,pc,imgi) 3 | % n2sgram : smoothed spectrogram 4 | % nsgram : isometric spectrogram 5 | % x : input waveform 6 | % fs : sampling frequency (Hz) 7 | % shiftm : frame shift (ms) 8 | % fftl : length of FFT 9 | % f0raw : Pitch information to gude analysis (TEMPO) assumed 10 | % f0var : expected f0 variance including zerocross information 11 | % f0varL : expected f0 variance 12 | % eta : 13 | % pc : 14 | % imgi : display indicator 1: display on (default), 0: off 15 | 16 | % f0shiftm : frame shift (ms) for F0 analysis 17 | 18 | % STRAIGHT body: Interporation using adaptive gaussian weighting 19 | % and 2-dimensional Bartlett window 20 | % by Hideki Kawahara 21 | % 02/July/1996 22 | % 07/July/1996 23 | % 07/Sep./1996 24 | % 09/Sep./1996 guiding F0 information can be coarse 25 | % 14/Oct./1996 correction for over smoothing 26 | % 19/Oct./1996 Alternating Gaussian Correction 27 | % 01/Nov./1996 Temporal integration using Fluency theory (didn't work) 28 | % 03/Nov./1996 Temporal integration using Fluency theory 29 | % 25/Dec./1996 Quasi optimum smooting 30 | % 01/Feb./1997 Minimum variance analysis 31 | % 03/Feb./1997 Clean up 32 | % 08/Feb./1997 Fine tuning for onset enhancement 33 | % 13/Feb./1997 another fine temporal structure 34 | % 16/Feb./1997 better alternating Gaussian 35 | % 21/Feb./1997 no need for temporal interpolation! 36 | % 19/June/1997 Control of Analysis Paramters 37 | % 21/July/1997 Discard of optimum comp. and introduction TD compensation 38 | % 11/Aug./1997 Re-installation of temporal smooting 39 | % 08/Feb./1998 debug and speed up using closed form 40 | % 22/April/1999 Compatible with new F0 extraction routine 41 | % 31/March/2002 modified for ICSLP2002 42 | % 03/Feb./2003 Bug fix in the modification on 31/March/2002 43 | % 10/Aug./2005 modified by Takahashi on waitbar 44 | % 10/Sept./2005 modified by Kawahara on waitbar 45 | % 05/Oct./2005 bug fix on smoothing (both in time and frequency) 46 | % 04/July/2006 bug fix on compensatory time window 47 | 48 | if nargin==9; imgi=1; end; % 10/Sept./2005 49 | f0l=f0raw(:) + 0 * f0var + 0 * f0varL; % + 0 * f0var + 0 * f0varL are dummy 50 | framem=80; 51 | framel=round(framem*fs/1000); 52 | if fftl0),[wGaussian zeros(1,length(tt))]); 100 | wPSGSeed = wPSGSeed/max(wPSGSeed); 101 | [~,maxLocation] = max(wPSGSeed); 102 | tNominal = ((1:length(wPSGSeed))-maxLocation)/fs; 103 | %---- end of bug fix 104 | 105 | ttm=[0.00001 1:fftl/2 -fftl/2+1:-1]/fs; 106 | 107 | lft=1.0./(1+exp(-(abs((1:fftl)-fftl/2-1)-fftl/30)/2)); % safeguard 05/Oct./2005 by HK 108 | 109 | if imgi==1; hpg=waitbar(0,'F0 adaptive time-frequency analysis.'); end;% 10/Aug./2005 110 | for ii=1:nframe 111 | if imgi==1 && rem(ii,10)==0 % 10/Aug./2005 112 | waitbar(ii/nframe); 113 | end; 114 | f0=f0l(max(1,ii)); 115 | if f0==0 116 | f0=160; % 09/Sept./1999 117 | end; 118 | 119 | f0x(ii)=f0; 120 | t0=1/f0; 121 | 122 | %wxe = interp1q(tNominal',wPSGSeed',tt'*f0/fNominal)'; %bug fix 04/July/2006 123 | wxe = interp1(tNominal',wPSGSeed',tt'*f0/fNominal,'linear','extrap')'; 124 | wxe(isnan(wxe))=zeros(size(wxe(isnan(wxe)))); 125 | wxe=wxe/sqrt(sum(wxe.^2)); 126 | wxd=bcf*wxe.*sin(pi*tt/t0); 127 | 128 | iix=round(ist:ist+framel-1); 129 | pw=sqrt(abs(fft((tx(iix)-mean(tx(iix))).*wxe,fftl)).^2+ ... 130 | abs(fft((tx(iix)-mean(tx(iix))).*wxd,fftl)).^2).^pc; 131 | 132 | nsgram(:,ii)=pw(bbase)'; 133 | f0p2=floor((f0/fs*fftl)/2+1); % modified by H.K. on 3/Feb./2003 134 | f0p=ceil((f0/fs*fftl)+1); % modified by H.K. on 3/Feb./2003 135 | f0pr=f0/fs*fftl+1; % added by H.K. on 3/Feb./2003 136 | tmppw=interp1(1:f0p,pw(1:f0p),f0pr-((1:f0p2)-1)); % added by H.K. on 3/Feb./2003 137 | pw(1:f0p2)=tmppw; % modified by H.K. on 3/Feb./2003 138 | pw(fftl:-1:fftl-f0p2+2)=pw(2:f0p2); 139 | 140 | % local level equalization 141 | ww2t=(sin(ttm/(t0/3)*pi)./(ttm/(t0/3)*pi)).^2; 142 | spw2=real(ifft(ww2t.*fft(pw).*lft)); 143 | spw2(spw2==0)=spw2(spw2==0)+eps; %%% safe guard added on 15/Jan./2003 144 | 145 | % Optimum weighting 146 | wwt=(sin(ttm/t0*pi)./(ttm/t0*pi)).^2.*(ovc(1)+ovc(2)*2*cos(ttm/t0*2*pi) ... 147 | +ovc(3)*2*cos(ttm/(t0/2)*2*pi)); 148 | spw=real(ifft(wwt.*fft(pw./spw2)))/wwt(1); 149 | 150 | % smooth half wave rectification 151 | n2sgram(:,ii) = (spw2(bbase).*(0.25*(log(2*cosh(spw(bbase)*4/1.4))*1.4+spw(bbase)*4)/2))'; 152 | 153 | ist=ist+shiftl; 154 | end; 155 | if imgi==1; close(hpg); end; % added 06/Dec./2002% 10/Aug./2005 156 | if imgi==1; fprintf('\n'); end;% 10/Aug./2005 157 | 158 | nsgram=nsgram.^(1/pc); 159 | n2sgram=n2sgram.^(2/pc); 160 | 161 | %----------------------------------------------------- 162 | % Dirty hack for controling time constant in 163 | % unvoiced part analysis 164 | %----------------------------------------------------- 165 | if imgi==1; hpg=waitbar(0,'spline-based F0 adaptive smooting'); end;% 10/Aug./2005 166 | ttlv=sum(sum(n2sgram)); 167 | ncw=round(2*fs/1000); 168 | 169 | lbb=round(300/fs*fftl); % 22/Sept./1999 170 | 171 | h3=(conv(hanning(round(fs/1000)),exp(-1400/fs*(0:ncw*2)))); % 30/July/1999 172 | pwc=fftfilt(h3,abs([xh2, zeros(1,ncw*10)]).^2); % 30/July/1999, % 08/Sept./1999 173 | if imgi==1; waitbar(0.1); end; % 08/Dec./2002% 10/Aug./2005 174 | pwc=pwc(round(1:fs/(1000/shiftm):length(pwc))); 175 | [nn,mm]=size(n2sgram); 176 | pwc=pwc(1:mm); 177 | pwc=pwc/sum(pwc)*sum(sum(n2sgram(lbb:nn,:))); 178 | if imgi==1; waitbar(0.2); end; % 08/Dec./2002% 10/Aug./2005 179 | 180 | pwch=fftfilt(h3,abs([xhh, zeros(1,ncw*10)]).^2);% 30/July/1999 181 | if imgi==1; waitbar(0.3); end; % 08/Dec./2002% 10/Aug./2005 182 | pwch=pwch(round(1:fs/(1000/shiftm):length(pwch))); 183 | [~,mm]=size(n2sgram); 184 | pwch=pwch(1:mm); 185 | pwch=pwch/sum(pwch)*ttlv; 186 | 187 | ipwm=7; % impact detection window size 188 | ipl=round(ipwm/shiftm); 189 | ww=hanning(ipl*2+1); 190 | ww=ww/sum(ww); 191 | apwt=fftfilt(ww,[pwch(:)' zeros(1,length(ww)*2)]); 192 | apwt=apwt((1:length(pwch))+ipl); 193 | dpwt=fftfilt(ww,[diff(pwch(:)').^2 zeros(1,length(ww)*2)]); 194 | dpwt=dpwt((1:length(pwch))+ipl); 195 | mmaa=max(apwt); 196 | apwt(apwt<=0)=apwt(apwt<=0)*0+mmaa; % bug fix 03/Sept./1999 197 | rr=(sqrt(dpwt)./apwt); 198 | lmbd=(1.0./(1+exp(-(sqrt(rr)-0.75)*20))); 199 | 200 | pwc=pwc.*lmbd+(1-lmbd).*sum(n2sgram); % time constant controller 201 | 202 | % Shaping amplitude envelope 203 | for ii=1:mm 204 | if f0raw(ii)==0 205 | n2sgram(:,ii)=pwc(ii)*n2sgram(:,ii)/sum(n2sgram(:,ii)); 206 | end; 207 | if imgi==1 && rem(ii,10)==0% 10/Aug./2005 208 | waitbar(0.4+0.5*ii/mm); % 08/Dec./2002 209 | end; 210 | end; 211 | 212 | n2sgram=abs(n2sgram+0.0000000001); 213 | n2sgram=sqrt(n2sgram); 214 | if imgi==1; waitbar(1); end; % 08/Dec./2002% 10/Aug./2005 215 | if imgi==1; fprintf('\n'); end; 216 | if imgi==1; close(hpg); end; 217 | -------------------------------------------------------------------------------- /src/straightPanel98bak.m: -------------------------------------------------------------------------------- 1 | function fig = straightPanel98bak() 2 | % This is the machine-generated representation of a Handle Graphics object 3 | % and its children. Note that handle values may change when these objects 4 | % are re-created. This may cause problems with any callbacks written to 5 | % depend on the value of the handle at the time the object was saved. 6 | % 7 | % To reopen this object, just type the name of the M-file at the MATLAB 8 | % prompt. The M-file and its associated MAT-file must be on your path. 9 | 10 | load straightpanel98 11 | 12 | h0 = figure('Color',[0.8 0.8 0.8], ... 13 | 'Colormap',mat0, ... 14 | 'Position',[336 165 646 559], ... 15 | 'Tag','STRAIGHT control panel v.1'); 16 | h1 = uicontrol('Parent',h0, ... 17 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 18 | 'Position',[309 226 200 292], ... 19 | 'Style','frame', ... 20 | 'Tag','Frame2'); 21 | h1 = uicontrol('Parent',h0, ... 22 | 'BackgroundColor',[0.65 0.65 0.65], ... 23 | 'Position',[313 293 193 37], ... 24 | 'Style','frame', ... 25 | 'Tag','Frame5'); 26 | h1 = uicontrol('Parent',h0, ... 27 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 28 | 'Position',[309 32 330 189], ... 29 | 'Style','frame', ... 30 | 'Tag','Frame1'); 31 | h1 = uicontrol('Parent',h0, ... 32 | 'BackgroundColor',[0.8 0.8 0.8], ... 33 | 'FontName','Helvetica', ... 34 | 'FontSize',18, ... 35 | 'Position',[201 528 284 23], ... 36 | 'String','STRAIGHT control panel', ... 37 | 'Style','text', ... 38 | 'Tag','StaticText1');% font size 24->18 03/Sept./1999 39 | h1 = uicontrol('Parent',h0, ... 40 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 41 | 'Callback','clear all,close all', ... 42 | 'Position',[311 3 329 25], ... 43 | 'String','close', ... 44 | 'Tag','closebutton'); 45 | h1 = uicontrol('Parent',h0, ... 46 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 47 | 'Position',[516 226 123 291], ... 48 | 'Style','frame', ... 49 | 'Tag','Frame1'); 50 | h1 = uicontrol('Parent',h0, ... 51 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 52 | 'Callback','straightCIv1 peekvars', ... 53 | 'Position',[529 439 95 21], ... 54 | 'String','peek variables', ... 55 | 'Tag','peakbutton'); 56 | h1 = uicontrol('Parent',h0, ... 57 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 58 | 'Callback','straightCIv1 dispnsgram', ... 59 | 'Enable','off', ... 60 | 'Position',[515 173 112 20], ... 61 | 'String','adaptive spectrogram', ... 62 | 'Tag','adaptivespecbtn'); 63 | h1 = uicontrol('Parent',h0, ... 64 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 65 | 'Callback','straightCIv1 dispnwsgram', ... 66 | 'Enable','off', ... 67 | 'Position',[517 141 111 20], ... 68 | 'String','a. wide spectrogram', ... 69 | 'Tag','widespecbtn'); 70 | h1 = uicontrol('Parent',h0, ... 71 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 72 | 'Callback','straightCIv1 dispn2sgrambk', ... 73 | 'Enable','off', ... 74 | 'Position',[320 171 159 20], ... 75 | 'String','smthd spectrogram', ... 76 | 'Tag','smoothedspecbtn'); 77 | h1 = uicontrol('Parent',h0, ... 78 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 79 | 'Callback','straightCIv1 dispn3sgram', ... 80 | 'Enable','off', ... 81 | 'Position',[321 108 158 20], ... 82 | 'String','rmvd spectrogram', ... 83 | 'Tag','removedspecbtn'); 84 | h1 = uicontrol('Parent',h0, ... 85 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 86 | 'Callback','straightCIv1 playsynth', ... 87 | 'Enable','off', ... 88 | 'Position',[320 45 145 20], ... 89 | 'String','Play synthesized', ... 90 | 'Tag','playsynthbtn'); 91 | h1 = uicontrol('Parent',h0, ... 92 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 93 | 'Callback','straightCIv1 playoriginal', ... 94 | 'Enable','off', ... 95 | 'Position',[483 44 148 20], ... 96 | 'String','Play original', ... 97 | 'Tag','playorgbtn'); 98 | h1 = uicontrol('Parent',h0, ... 99 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 100 | 'Callback','straightCIv1 savefile', ... 101 | 'Enable','off', ... 102 | 'Position',[329 236 113 22], ... 103 | 'String','save to file', ... 104 | 'Tag','savetobtn'); 105 | h1 = uicontrol('Parent',h0, ... 106 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 107 | 'Callback','straightCIv1 synthesize', ... 108 | 'Enable','off', ... 109 | 'Position',[424 266 80 22], ... 110 | 'String','synthesize', ... 111 | 'Tag','synthesizebtn'); 112 | h1 = uicontrol('Parent',h0, ... 113 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 114 | 'Callback','straightCIv1 straightcore', ... 115 | 'Enable','off', ... 116 | 'Position',[424 334 79 22], ... 117 | 'String','analyze 1CHX', ... 118 | 'Tag','analyzespcbtn'); 119 | h1 = uicontrol('Parent',h0, ... 120 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 121 | 'Callback','straightCIv1 source', ... 122 | 'Enable','off', ... 123 | 'Position',[355 367 113 22], ... 124 | 'String','analyze source', ... 125 | 'Tag','analyzesrcbtn'); 126 | h1 = uicontrol('Parent',h0, ... 127 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 128 | 'Callback','straightCIv1 readfile', ... 129 | 'Position',[354 401 113 22], ... 130 | 'String','read from file', ... 131 | 'Tag','readfilebtn'); 132 | h1 = uicontrol('Parent',h0, ... 133 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 134 | 'Callback','straightCIv1 initialize', ... 135 | 'Position',[353 438 113 22], ... 136 | 'String','initialize', ... 137 | 'Tag','initializebtn'); 138 | h1 = uicontrol('Parent',h0, ... 139 | 'Position',[5 252 297 268], ... 140 | 'Style','frame', ... 141 | 'Tag','Frame1'); 142 | h1 = uicontrol('Parent',h0, ... 143 | 'Position',[160 427 125 20], ... 144 | 'String','sampling frequency Hz', ... 145 | 'Style','text', ... 146 | 'Tag','StaticText2'); 147 | h1 = uicontrol('Parent',h0, ... 148 | 'BackgroundColor',[0.85 0.85 0.85], ... 149 | 'Callback','straightCIv1 getfsmenu', ... 150 | 'Position',[170 411 108 20], ... 151 | 'String',[48000;44100;32000;24000;22050;20000;16000;12500;12000;11050;10000; 8000], ... 152 | 'Style','popupmenu', ... 153 | 'Tag','samplingfreqmenu', ... 154 | 'Value',5); 155 | h1 = uicontrol('Parent',h0, ... 156 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 157 | 'Position',[353 483 111 20], ... 158 | 'String','Procedures', ... 159 | 'Style','text', ... 160 | 'Tag','StaticText3'); 161 | h1 = uicontrol('Parent',h0, ... 162 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 163 | 'Position',[431 194 74 20], ... 164 | 'String','Display', ... 165 | 'Style','text', ... 166 | 'Tag','StaticText4'); 167 | h1 = uicontrol('Parent',h0, ... 168 | 'BackgroundColor',[1 1 1], ... 169 | 'Callback',mat2, ... 170 | 'Position',[27 459 72 20], ... 171 | 'String','800', ... 172 | 'Style','edit', ... 173 | 'Tag','f0ceiledit'); 174 | h1 = uicontrol('Parent',h0, ... 175 | 'BackgroundColor',[1 1 1], ... 176 | 'Callback','straightCIv1 editf0floor', ... 177 | 'Position',[28 411 72 20], ... 178 | 'String','40', ... 179 | 'Style','edit', ... 180 | 'Tag','f0flooredit'); 181 | h1 = uicontrol('Parent',h0, ... 182 | 'Position',[102 407 32 20], ... 183 | 'String','Hz', ... 184 | 'Style','text', ... 185 | 'Tag','StaticText5'); 186 | h1 = uicontrol('Parent',h0, ... 187 | 'Position',[101 454 32 20], ... 188 | 'String','Hz', ... 189 | 'Style','text', ... 190 | 'Tag','StaticText5'); 191 | h1 = uicontrol('Parent',h0, ... 192 | 'Position',[20 431 94 20], ... 193 | 'String','F0 lower bound', ... 194 | 'Style','text', ... 195 | 'Tag','StaticText5'); 196 | h1 = uicontrol('Parent',h0, ... 197 | 'Position',[20 483 95 17], ... 198 | 'String','F0 higher bound', ... 199 | 'Style','text', ... 200 | 'Tag','StaticText5'); 201 | h1 = uicontrol('Parent',h0, ... 202 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 203 | 'Callback','straightCIv1 remove2ndstructue', ... 204 | 'Enable','off', ... 205 | 'Position',[425 302 77 20], ... 206 | 'String','remove 2nd', ... 207 | 'Tag','remove2ndbtn'); 208 | h1 = uicontrol('Parent',h0, ... 209 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 210 | 'Callback','straightCIv1 bandcorrbtn', ... 211 | 'Enable','off', ... 212 | 'Position',[319 334 85 20], ... 213 | 'String','analyze MBX', ... 214 | 'Tag','bandcorrbtn'); 215 | h1 = uicontrol('Parent',h0, ... 216 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 217 | 'Callback','straightCIv1 dispn2sgram', ... 218 | 'Enable','off', ... 219 | 'Position',[320 139 158 20], ... 220 | 'String','enhanced spectrogram', ... 221 | 'Tag','dispn2sgrambtn'); 222 | h1 = uicontrol('Parent',h0, ... 223 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 224 | 'Callback','straightCIv1 synthesizegraded', ... 225 | 'Enable','off', ... 226 | 'Position',[320 266 86 21], ... 227 | 'String','synthsize grad', ... 228 | 'Tag','synthgradbtn'); 229 | h1 = uicontrol('Parent',h0, ... 230 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 231 | 'Callback','straightCIv1 disphhbspectrograme', ... 232 | 'Enable','off', ... 233 | 'Position',[321 77 158 20], ... 234 | 'String','cmpst spectrogram', ... 235 | 'Tag','cmpstspecgrambtn'); 236 | h1 = uicontrol('Parent',h0, ... 237 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 238 | 'Position',[15 293 278 99], ... 239 | 'Style','frame', ... 240 | 'Tag','Frame3'); 241 | h1 = uicontrol('Parent',h0, ... 242 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 243 | 'Position',[6 4 297 240], ... 244 | 'Style','frame', ... 245 | 'Tag','Frame4'); 246 | h1 = uicontrol('Parent',h0, ... 247 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 248 | 'Position',[24 366 89 15], ... 249 | 'String','FFT lngth', ... 250 | 'Style','text', ... 251 | 'Tag','StaticText6'); 252 | h1 = uicontrol('Parent',h0, ... 253 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 254 | 'Position',[121 362 75 20], ... 255 | 'String','w strtch in t', ... 256 | 'Style','text', ... 257 | 'Tag','StaticText7'); 258 | h1 = uicontrol('Parent',h0, ... 259 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 260 | 'Position',[210 362 71 20], ... 261 | 'String','pwr cnstnt', ... 262 | 'Style','text', ... 263 | 'Tag','StaticText8'); 264 | h1 = uicontrol('Parent',h0, ... 265 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 266 | 'Position',[36 322 72 20], ... 267 | 'String','mag. factor', ... 268 | 'Style','text', ... 269 | 'Tag','StaticText9'); 270 | h1 = uicontrol('Parent',h0, ... 271 | 'BackgroundColor',[1 1 1], ... 272 | 'Callback','straightCIv1 fftledit', ... 273 | 'Position',[41 347 56 20], ... 274 | 'String','1024', ... 275 | 'Style','edit', ... 276 | 'Tag','fftledit'); 277 | h1 = uicontrol('Parent',h0, ... 278 | 'BackgroundColor',[1 1 1], ... 279 | 'Callback','straightCIv1 wndwstrtchedit', ... 280 | 'Position',[128 347 60 20], ... 281 | 'String','1.4', ... 282 | 'Style','edit', ... 283 | 'Tag','wndwstrtchedit'); 284 | h1 = uicontrol('Parent',h0, ... 285 | 'BackgroundColor',[1 1 1], ... 286 | 'Callback','straightCIv1 pwrcnstntedit', ... 287 | 'Position',[219 346 54 20], ... 288 | 'String','0.6', ... 289 | 'Style','edit', ... 290 | 'Tag','pwrcnstntedit'); 291 | h1 = uicontrol('Parent',h0, ... 292 | 'BackgroundColor',[1 1 1], ... 293 | 'Callback','straightCIv1 magfactoredit', ... 294 | 'Position',[42 307 56 20], ... 295 | 'String','0.2', ... 296 | 'Style','edit', ... 297 | 'Tag','magfactoredit'); 298 | h1 = uicontrol('Parent',h0, ... 299 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 300 | 'Callback','straightCIv1 pcnvslider', ... 301 | 'Min',-1, ... 302 | 'Position',[45 90 186 20], ... 303 | 'String','F0 conversion', ... 304 | 'Style','slider', ... 305 | 'Tag','pcnvslider'); 306 | h1 = uicontrol('Parent',h0, ... 307 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 308 | 'Callback','straightCIv1 fconvslider', ... 309 | 'Min',-1, ... 310 | 'Position',[45 50 187 20], ... 311 | 'Style','slider', ... 312 | 'Tag','fconvslider'); 313 | h1 = uicontrol('Parent',h0, ... 314 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 315 | 'Callback','straightCIv1 sconvslider', ... 316 | 'Min',-1, ... 317 | 'Position',[44 10 188 20], ... 318 | 'Style','slider', ... 319 | 'Tag','sconvslider'); 320 | h1 = uicontrol('Parent',h0, ... 321 | 'BackgroundColor',[1 1 1], ... 322 | 'Callback','straightCIv1 delfracedit', ... 323 | 'Position',[153 214 60 20], ... 324 | 'String','0.2', ... 325 | 'Style','edit', ... 326 | 'Tag','delfracedit'); 327 | h1 = uicontrol('Parent',h0, ... 328 | 'BackgroundColor',[1 1 1], ... 329 | 'Callback','straightCIv1 pcnvedit', ... 330 | 'Position',[236 91 60 20], ... 331 | 'String','1', ... 332 | 'Style','edit', ... 333 | 'Tag','pcnvedit'); 334 | h1 = uicontrol('Parent',h0, ... 335 | 'BackgroundColor',[1 1 1], ... 336 | 'Callback','straightCIv1 fconvedit', ... 337 | 'Position',[237 50 60 20], ... 338 | 'String','1', ... 339 | 'Style','edit', ... 340 | 'Tag','fconvedit'); 341 | h1 = uicontrol('Parent',h0, ... 342 | 'BackgroundColor',[1 1 1], ... 343 | 'Callback','straightCIv1 sconvedit', ... 344 | 'Position',[237 11 60 20], ... 345 | 'String','1', ... 346 | 'Style','edit', ... 347 | 'Tag','sconvedit'); 348 | h1 = uicontrol('Parent',h0, ... 349 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 350 | 'Position',[26 215 122 20], ... 351 | 'String','relative tg dispersion', ... 352 | 'Style','text', ... 353 | 'Tag','StaticText10'); 354 | h1 = uicontrol('Parent',h0, ... 355 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 356 | 'Position',[27 192 121 20], ... 357 | 'String','absolute tg dispersion', ... 358 | 'Style','text', ... 359 | 'Tag','StaticText11'); 360 | h1 = uicontrol('Parent',h0, ... 361 | 'BackgroundColor',[1 1 1], ... 362 | 'Callback','straightCIv1 delspedit', ... 363 | 'Position',[154 191 60 20], ... 364 | 'String','2', ... 365 | 'Style','edit', ... 366 | 'Tag','delspedit'); 367 | h1 = uicontrol('Parent',h0, ... 368 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 369 | 'Position',[28 170 121 20], ... 370 | 'String','corner frequency', ... 371 | 'Style','text', ... 372 | 'Tag','StaticText11'); 373 | h1 = uicontrol('Parent',h0, ... 374 | 'BackgroundColor',[1 1 1], ... 375 | 'Callback','straightCIv1 cornfedit', ... 376 | 'Position',[154 169 60 20], ... 377 | 'String','3000', ... 378 | 'Style','edit', ... 379 | 'Tag','cornfedit'); 380 | h1 = uicontrol('Parent',h0, ... 381 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 382 | 'Position',[27 148 121 20], ... 383 | 'String','tg smoothness', ... 384 | 'Style','text', ... 385 | 'Tag','StaticText11'); 386 | h1 = uicontrol('Parent',h0, ... 387 | 'BackgroundColor',[1 1 1], ... 388 | 'Callback','straightCIv1 gdbwedit', ... 389 | 'Position',[154 148 60 20], ... 390 | 'String','70', ... 391 | 'Style','edit', ... 392 | 'Tag','gdbwedit'); 393 | h1 = uicontrol('Parent',h0, ... 394 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 395 | 'Position',[70 112 121 20], ... 396 | 'String','F0 conversion', ... 397 | 'Style','text', ... 398 | 'Tag','StaticText11'); 399 | h1 = uicontrol('Parent',h0, ... 400 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 401 | 'Position',[47 70 161 20], ... 402 | 'String','frequency axis conversion', ... 403 | 'Style','text', ... 404 | 'Tag','StaticText11'); 405 | h1 = uicontrol('Parent',h0, ... 406 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 407 | 'Position',[49 29 156 20], ... 408 | 'String','temporal axis conversion', ... 409 | 'Style','text', ... 410 | 'Tag','StaticText11'); 411 | h1 = uicontrol('Parent',h0, ... 412 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 413 | 'Position',[221 167 31 20], ... 414 | 'String','Hz', ... 415 | 'Style','text', ... 416 | 'Tag','StaticText11'); 417 | h1 = uicontrol('Parent',h0, ... 418 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 419 | 'Position',[220 190 35 20], ... 420 | 'String','ms', ... 421 | 'Style','text', ... 422 | 'Tag','StaticText11'); 423 | h1 = uicontrol('Parent',h0, ... 424 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 425 | 'Position',[221 147 31 20], ... 426 | 'String','Hz', ... 427 | 'Style','text', ... 428 | 'Tag','gdbwtxt'); 429 | h1 = uicontrol('Parent',h0, ... 430 | 'BackgroundColor',[1 1 1], ... 431 | 'Callback','straightCIv1 tpathedit', ... 432 | 'Position',[21 259 268 20], ... 433 | 'String','hmac117_HD:MATLAB 5:', ... 434 | 'Style','edit', ... 435 | 'Tag','tpathedit'); 436 | h1 = uicontrol('Parent',h0, ... 437 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 438 | 'Callback','straightCIv1 delfracradio', ... 439 | 'Position',[251 213 40 20], ... 440 | 'Style','radiobutton', ... 441 | 'Tag','delfracradio'); 442 | h1 = uicontrol('Parent',h0, ... 443 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 444 | 'Callback','straightCIv1 delspradio', ... 445 | 'Position',[252 191 42 20], ... 446 | 'Style','radiobutton', ... 447 | 'Tag','delspradio', ... 448 | 'Value',1); 449 | h1 = uicontrol('Parent',h0, ... 450 | 'Position',[145 481 147 20], ... 451 | 'String','original sound file', ... 452 | 'Style','text', ... 453 | 'Tag','StaticText12'); 454 | h1 = uicontrol('Parent',h0, ... 455 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 456 | 'Position',[523 487 111 20], ... 457 | 'String','AUX', ... 458 | 'Style','text', ... 459 | 'Tag','StaticText3'); 460 | h1 = uicontrol('Parent',h0, ... 461 | 'BackgroundColor',[1 1 1], ... 462 | 'Position',[144 461 149 20], ... 463 | 'String','none', ... 464 | 'Style','edit', ... 465 | 'Tag','soundfilename'); 466 | h1 = uicontrol('Parent',h0, ... 467 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 468 | 'Callback','straightCIv1 bypassbtn', ... 469 | 'Enable','off', ... 470 | 'Position',[320 302 82 20], ... 471 | 'String','bypass', ... 472 | 'Tag','bypassbtn'); 473 | h1 = uicontrol('Parent',h0, ... 474 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 475 | 'Callback','straightCIv1 upsamplebtn', ... 476 | 'Position',[446 237 60 20], ... 477 | 'String','up ENBL', ... 478 | 'Style','radiobutton', ... 479 | 'Tag','upsamplebtn'); 480 | h1 = uicontrol('Parent',h0, ... 481 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 482 | 'Callback','straightCIv1 resetparamsbtn', ... 483 | 'Position',[531 243 92 20], ... 484 | 'String','reset parameters', ... 485 | 'Tag','resetparamsbtn'); 486 | h1 = uicontrol('Parent',h0, ... 487 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 488 | 'ButtonDownFcn','straightCIv1 F0NLbtn', ... 489 | 'Position',[12 91 28 20], ... 490 | 'String','NL', ... 491 | 'Tag','F0NLbtn'); 492 | h1 = uicontrol('Parent',h0, ... 493 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 494 | 'Callback','straightCIv1 FqNLbtn', ... 495 | 'Position',[12 50 28 20], ... 496 | 'String','NL', ... 497 | 'Tag','FqNLbtn'); 498 | h1 = uicontrol('Parent',h0, ... 499 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 500 | 'ButtonDownFcn','straightCIv1 txNLbtn', ... 501 | 'Position',[11 11 28 20], ... 502 | 'String','NL', ... 503 | 'Tag','txNLbtn'); 504 | h1 = uicontrol('Parent',h0, ... 505 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 506 | 'Position',[119 322 80 20], ... 507 | 'String','frame rate(ms)', ... 508 | 'Style','text', ... 509 | 'Tag','StaticText13'); 510 | h1 = uicontrol('Parent',h0, ... 511 | 'BackgroundColor',[1 1 1], ... 512 | 'Callback','straightCIv1 editshiftm', ... 513 | 'Position',[129 306 60 20], ... 514 | 'Style','edit', ... 515 | 'Tag','shiftmedit'); 516 | h1 = uicontrol('Parent',h0, ... 517 | 'Callback','straightCIv1 bininputformat', ... 518 | 'Position',[532 394 89 20], ... 519 | 'String','PC/Alpha (little-endian)|Sun/Mac (big-endian)', ... 520 | 'Style','popupmenu', ... 521 | 'Tag','bininputformat', ... 522 | 'Value',1); 523 | h1 = uicontrol('Parent',h0, ... 524 | 'Callback','straightCIv1 binoutputformat', ... 525 | 'Position',[532 351 87 22], ... 526 | 'String','PC/Alpha (little-endian)|Sun/Mac (big-endian)', ... 527 | 'Style','popupmenu', ... 528 | 'Tag','binoutputformat', ... 529 | 'Value',1); 530 | h1 = uicontrol('Parent',h0, ... 531 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 532 | 'Position',[522 417 108 16], ... 533 | 'String','Bin.IN format', ... 534 | 'Style','text', ... 535 | 'Tag','StaticText14'); 536 | h1 = uicontrol('Parent',h0, ... 537 | 'BackgroundColor',[0.733333 0.733333 0.733333], ... 538 | 'Position',[522 375 108 17], ... 539 | 'String','Bin.OUT format', ... 540 | 'Style','text', ... 541 | 'Tag','StaticText15'); 542 | if nargout > 0, fig = h0; end 543 | -------------------------------------------------------------------------------- /src/straightSynthTB06.m: -------------------------------------------------------------------------------- 1 | function sy=straightSynthTB06(n2sgram,f0raw,f0var,f0varL,shiftm,fs, ... 2 | pcnv,fconv,sconv,gdbw,delfrac,delsp,cornf,delfracind); 3 | % Straight synthesis with all-pass filter design based on 4 | % TEMPO analysis result 5 | % sy=straightSynthTB06(n2sgram,f0raw,f0var,f0varL,shiftm,fs, ... 6 | % pcnv,fconv,sconv,gdbw,delfrac,delsp,cornf,delfracind); 7 | % sy : synthsized speech 8 | % n2sgram : amplitude spectrogram 9 | % f0raw : pitch pattern (Hz) 10 | % f0var : expected F0 variation with fricative modification 11 | % f0varL : expected F0 variation 12 | % shiftm : frame shift (ms) for spectrogram 13 | % fs : sampling freqnency (Hz) 14 | % pcnv : pitch stretch factor 15 | % fconv : freqnency stretch factor 16 | % sconv : speaking duratin stretch factor 17 | % gdbw : finest resolution in group delay (Hz) 18 | % delfrac : ratio of standard deviation of group delay in terms of F0 19 | % delsp : standard deviation of group delay (ms) 20 | % cornf : lower corner frequency for phase randomization (Hz) 21 | % delfracind : selector of fixed and proportional group delay 22 | 23 | 24 | % Straight synthesis with all-pass filter design 25 | % by Hideki Kawahara 26 | % (c) ATR Human Info. Proc. Res. Labs. 1996 27 | % 07/July/1996 28 | % 12/Aug./1996 29 | % 22/Aug./1996 30 | % 06/Sep./1996 BUG FIX!!! wrong sign 31 | % 07/Sep./1996 converted to function script 32 | % 09/Sep./1996 coarse F0 information is possible 33 | % 16/Sep./1996 tolerant to F0 extraction errors 34 | % 02/Nov./1996 Now pitch extraction is perfect. No need for the hack. 35 | % 02/Feb./1997 Without V/UV discrimination 36 | % 08/June/1999 minor bug fix 37 | 38 | f0l=f0raw; 39 | [nii,njj]=size(n2sgram); 40 | fftl=nii+nii-2; 41 | fftl2=fftl/2; 42 | if length(fconv)==1 43 | idcv=min([0:fftl/2]/fconv+1,fftl/2+1); % f. stretch conv. tabel 44 | elseif length(fconv)==nii 45 | idcv=fconv(:)'; 46 | end; 47 | sy=zeros([round((njj*shiftm/1000*fs)*sconv+3*fftl+1),1]); 48 | syo=sy; 49 | 50 | mixVhigh=sqrt(0.25./(f0var+0.25)); 51 | mixNhigh=sqrt(1-0.25./(f0var+0.25)); 52 | 53 | mixVlow=sqrt(0.25./(f0varL+0.25)); 54 | mixNlow=sqrt(1-0.25./(f0varL+0.25)); 55 | 56 | phs=fractpitch2(fftl); % phs will have smooth phase function for unit delay 57 | 58 | a=([0:fftl2-1,0,-(fftl2-1:-1:1)])/fftl2; 59 | sz=a'*pi; 60 | ta=[0:fftl2-1]/fftl2/2*2*pi; 61 | t=[ta,0,-ta(fftl2:-1:2)]; 62 | 63 | fftl2=fftl/2; 64 | nsyn=length(sy); 65 | idx=1; 66 | bb=1:fftl; 67 | bb2=1:fftl2; 68 | rbb2=fftl/2:-1:2; 69 | 70 | %------- shaping for low-frequency noize supression 71 | 72 | fxa=(0:fftl2)/fftl*fs; 73 | f0tmp=f0l.*(mixVlow>0.8); 74 | lowcutf=mean(f0tmp(f0tmp>0))*0.7*pcnv; 75 | %lowcutfav=mean(f0l(f0l>0))*0.8; 76 | lowcutfav=lowcutf; 77 | %wlcutav=1.0./(1+exp(-5*(fxa-lowcutfav)/(lowcutfav/3))); 78 | wlcutav=1.0./(1+exp(-14*(fxa-lowcutfav)/(lowcutfav/1))); 79 | %keyboard; 80 | 81 | 82 | %------- parameters for noize based apf design 83 | 84 | t=([1:fftl]-fftl/2-1)/fftl*2; 85 | adjd=1.0./(1+exp(-20*t)); % correction function for smooth transition at fs/2 86 | gw=exp(-0.25*pi*(fs*(t/2)/gdbw).^2); % slope difinition function 87 | gw=gw/sum(gw); % gdbw is the equvalent rectangular band width 88 | fgw=real(fft(fftshift(gw))); % gw is the spectral smoothing window 89 | df=fs/fftl*2*pi; % normalization constant for integration and differentiation 90 | fw=(1:fftl2+1)/fftl*fs; % frequency axis 91 | 92 | trbw=300; % width of transition area 93 | rho=1.0./(1+exp(-(fw-cornf)/trbw)); % rondom group delay weighting function 94 | 95 | [snn,smm]=size(n2sgram); 96 | fqx=(0:snn-1)/snn*fs/2; 97 | chigh=1.0./(1+exp(-(fqx-600)/100))'; 98 | clow=1.0-chigh; 99 | 100 | f0arc=0; 101 | lft=1-hanning(fftl); 102 | lft=1.0./(1+exp(-(lft-0.5)*60)); 103 | ww=1.0./(1+exp(-(hanning(fftl)-0.3)*23)); % lifter for 104 | 105 | iin=1; 106 | dmx=max(max(n2sgram)); 107 | while (idx < nsyn-fftl-10) & (ceil(iin) 1000000 122 | ccp2=[ccp(1);2*ccp(2:fftl/2);0*ccp(fftl/2+1:fftl)]; 123 | ffx=(fft(ccp2.*lft)/fftl); 124 | nidx=round(idx); 125 | 126 | 127 | % wlcut=1.0./(1+exp(-20*(fxa-lowcutf)/lowcutf)); 128 | nf0=fs/f0; 129 | frt=idx-nidx; 130 | frtz=exp(i*phs*frt)'; % This was in a wrong sign! 131 | 132 | nz=randn(1,fftl2+1).*((rho*0+1)*mixNlow(round(ii))+(1-mixNlow(round(ii)))*rho); 133 | nz=real(ifft(fft([nz,nz(rbb2)]).*fgw)); 134 | nz=nz*sqrt(fftl*gdbw/fs); % correction factor for noise 135 | if delfracind, delsp=delfrac*1000/f0; end; 136 | nz=nz*delsp*df/1000; 137 | mz=cumsum([nz(1:fftl2+1),nz(rbb2)])-nz(1); 138 | mmz=-(mz-adjd*(rem((mz(fftl)+mz(2)),2*pi)-2*pi)); 139 | pz=exp(-i*mmz)'; %.*[wlcut wlcut(rbb2)]'; 140 | 141 | tx=fftshift(real(ifft(exp(ffx).*pz.*frtz.*[mix;mix(rbb2)]))).*ww; 142 | % tx=fftshift(real(ifft(ff.*pz.*frtz.*[mix;mix(rbb2)]))).*ww; 143 | sy(bb+nidx)=sy(bb+nidx)+tx*sqrt(nf0); 144 | % if abs(round(ii)-90)<10 145 | % keyboard; 146 | % end; 147 | 148 | idx=idx+nf0; 149 | iin=min(length(f0l),idx/fs*1000/shiftm/sconv+1); 150 | if (mixVlow(round(ii))<0.8) & (mixVlow(round(iin))>0.8) 151 | idxo=idx; 152 | ipos=min(find(mixVlow(round(ii:iin))>0.8))-1+ii; 153 | if length(ipos)==0 154 | idx=idxo; 155 | else 156 | idx=max(idxo-nf0+1,(ipos-1)*fs/1000*shiftm*sconv); 157 | end; 158 | end; 159 | % disp([idx,iin]) 160 | end; 161 | %sy=sy*0; 162 | ii=1; 163 | idx=1; 164 | f0=500; 165 | f0=1000; 166 | %wlcutfric=1.0./(1+exp(-14*(fxa-lowcutfav*2)/(lowcutfav))); 167 | wlcutfric=1.0./(1+exp(-14*(fxa-lowcutfav)/(lowcutfav))); % 31/July/1999 168 | 169 | while (idx < nsyn-fftl) & (ii0.03 173 | mix=mixNlow(ii)*clow(round(idcv(:)))+mixNhigh(ii)*chigh(round(idcv(:))); 174 | ff=[n2sgram(round(idcv(:)),ii);n2sgram(round(idcv(rbb2)),ii)]; 175 | % ff=ff.*[wlcut wlcut(rbb2)]'; 176 | ff=ff.*[wlcutfric wlcutfric(rbb2)]'; 177 | % ccp=real(fft(log(ff+0.001))); % 23rd July, 1999 178 | ccp=real(fft(log(ff+dmx/100000))); % 23rd July, 1999 % 24th Sept. 1999 179 | ccp2=[ccp(1);2*ccp(2:fftl/2);0*ccp(fftl/2+1:fftl)]; 180 | ffx=(fft(ccp2.*lft)/fftl); 181 | nf0=fs/f0; 182 | 183 | %============= deleted on 18/July/1999 ====== 184 | % if f0l(ii) > 0 185 | % f0x=lowcutf; % f0l(ii)*pcnv; 186 | % f0x=f0l(ii)*pcnv; 187 | % wlcut=1.0./(1+exp(-20*(fxa-f0x*0.8)/lowcutf)); 188 | % wlcut=wlcutav; 189 | % tx=fftshift(real(ifft(exp(ffx).*[wlcut.*mix' wlcut(rbb2).*mix(rbb2)']'))); 190 | % else 191 | % tx=fftshift(real(ifft(exp(ffx).*[wlcutav.*mix' wlcutav(rbb2).*mix(rbb2)']'))); 192 | % end; 193 | tx=fftshift(real(ifft(exp(ffx)))); 194 | %============= end of modification on 18/July/1999 ==== 195 | 196 | rx=randn([round(nf0),1]); 197 | tnx=fftfilt(rx,tx); 198 | sy(bb+nidx)=sy(bb+nidx)+tnx(bb).*ww; 199 | end; 200 | idx=idx+nf0; 201 | ii=min(length(f0l),idx/fs*1000/shiftm/sconv+1); 202 | end; 203 | 204 | sy2=sy(fftl/2+(1:round((njj*shiftm/1000*fs)*sconv))); 205 | 206 | lowcutf=70; 207 | if lowcutf <70 208 | lowcutf=70; 209 | end; 210 | %[b,a]=butter(5,lowcutf/fs*2,'high'); 211 | %sy=filter(b,a,sy2); 212 | sy=sy2; 213 | 214 | 215 | -------------------------------------------------------------------------------- /src/straightSynthTB07ca.m: -------------------------------------------------------------------------------- 1 | function [sy,synthSataus]=straightSynthTB07ca(n2sgram,f0raw,shiftm,fs, ... 2 | pcnv,fconv,sconv,gdbw,delfrac,delsp,cornf,delfracind,ap,imap,imgi,lowestF0) 3 | % Straight synthesis with all-pass filter design based on 4 | % TEMPO analysis result 5 | % sy=straightSynthTB07ca(n2sgram,f0raw,f0var,f0varL,shiftm,fs, ... 6 | % pcnv,fconv,sconv,gdbw,delfrac,delsp,cornf,delfracind,ap,imap,imgi)); 7 | % sy : synthsized speech 8 | % n2sgram : amplitude spectrogram 9 | % f0raw : pitch pattern (Hz) 10 | % f0var : expected F0 variation with fricative modification 11 | % f0varL : expected F0 variation 12 | % shiftm : frame shift (ms) for spectrogram 13 | % fs : sampling freqnency (Hz) 14 | % pcnv : pitch stretch factor 15 | % fconv : freqnency stretch factor 16 | % sconv : speaking duratin stretch factor (overridden if || imap || >1 ) 17 | % gdbw : finest resolution in group delay (Hz) 18 | % delfrac : ratio of standard deviation of group delay in terms of F0 19 | % delsp : standard deviation of group delay (ms) 20 | % cornf : lower corner frequency for phase randomization (Hz) 21 | % delfracind : selector of fixed and proportional group delay 22 | % ap : aperiodicity measure 23 | % imap : arbirtary mapping from new time (sample) to old time (frame) 24 | % imgi : display indicator, 1: display on (default), 0: off 25 | % lowestF0 : lower limit of the resynthesized fundamental frequency (Hz) 26 | 27 | % Straight synthesis with all-pass filter design 28 | % by Hideki Kawahara 29 | % (c) ATR Human Info. Proc. Res. Labs. 1996 30 | % 07/July/1996 31 | % 12/Aug./1996 32 | % 22/Aug./1996 33 | % 06/Sep./1996 BUG FIX!!! wrong sign 34 | % 07/Sep./1996 converted to function script 35 | % 09/Sep./1996 coarse F0 information is possible 36 | % 16/Sep./1996 tolerant to F0 extraction errors 37 | % 02/Nov./1996 Now pitch extraction is perfect. No need for the hack. 38 | % 02/Feb./1997 Without V/UV discrimination 39 | % 08/June/1999 minor bug fix 40 | 41 | % 03/Sep./1999 Graded excitation with one parameter 42 | % 29/Nov./1999 Arbitrary time axis mapping 43 | % 30/May/2001 revised aperiodicity control 44 | % 08/April/2002 revised to remove magical LPF 45 | % 11/August/2002 bug fix for V/UV transition 46 | % 24/August/2002 more precise F0 control 47 | % 23/Sept./2002 minor adjustment for the length of the resynthesized signal 48 | % 05/Dec./2002 minor bug fix based on M. Tsuzaki's comment 49 | % 17/Dec./2002 bug fix in mid point selection 50 | % 10/Aug./2005 modified by Takahashi on waitbar 51 | % 10/Sept./2005 modified by Kawahara on waitbar 52 | % 27/Nov./2005 modified by Kawahara for 53 | % 21/April/2010 bug fix by Hideki Kawahara for aperiodicity 54 | % 03/July/2016 refactored for MATLAB R2016a and Octave 4.0.2 55 | 56 | %if nargin<=14; imgi=1; end; % 10/Sept./2005 57 | statusReport = 'ok';% 27/Nov./2005 58 | switch nargin % 27/Nov./2005 59 | case {1,2,3,4,5,6,7,8,9,10,11,12,13,14} 60 | imgi = 1; 61 | lowestF0 = 50; 62 | case {15} 63 | lowestF0 = 50; 64 | end; 65 | f0l=f0raw; 66 | [nii,njj]=size(n2sgram); 67 | njj=min([njj,length(f0raw)]); % 18/Sep./1999 68 | f0l=f0l(1:njj); %03/Sep./1999 69 | if min(f0l(f0l>0))*pcnv < lowestF0 70 | statusReport = ['Minimum synthesized F0 exceeded the lower limit(' num2str(lowestF0) ' Hz).']; 71 | end; 72 | 73 | fftLengthForLowestF0 = 2^ceil(log2(2*round(fs/lowestF0)));% 27/Nov./2005 74 | fftl=nii+nii-2; 75 | if fftl < fftLengthForLowestF0 % 27/Nov./2005 76 | niiNew = fftLengthForLowestF0/2+1; 77 | statusReport = 'The FFT length was inconsistent and replaced'; 78 | n2sgram = interp1(0:nii-1,n2sgram,(0:niiNew-1)*(nii-1)/(niiNew-1)); 79 | ap = interp1(0:nii-1,ap,(0:niiNew-1)*(nii-1)/(niiNew-1)); 80 | fftl = fftLengthForLowestF0; 81 | nii = niiNew; 82 | end; 83 | 84 | % safeguard for ap mismatch 21/April/2010 85 | if size(ap,1) ~= size(n2sgram,1) 86 | apDouble = zeros(size(n2sgram,1),size(ap,2)); 87 | for ik = 1:size(ap,2) 88 | apDouble(:,ik) = interp1((0:size(ap,1)-1),ap(:,ik),... 89 | (0:size(n2sgram,1)-1)/((size(n2sgram,1)-1)/(size(ap,1)-1)),'linear','extrap'); 90 | end; 91 | ap = apDouble; 92 | end; 93 | 94 | aprms=10.0.^(ap/20); % 23/Sept./1999 95 | aprm=min(1,max(0.001,aprms*1.6-0.015)); % 30/May/2001 96 | 97 | if length(fconv)==1 98 | idcv=min((0:fftl/2)/fconv+1,fftl/2+1); % f. stretch conv. tabel 99 | elseif length(fconv)==nii 100 | idcv=fconv(:)'; 101 | elseif length(fconv) ~= nii 102 | idcv = 1:fftl/2+1; 103 | statusReport = [statusReport '\n' 'Frequency axix mapping function is not consistent with lowestF0.']; 104 | end; 105 | if length(imap)>1 106 | sy=zeros(length(imap)+3*fftl,1);disp('here!!'); 107 | else 108 | sy=zeros([round((njj*shiftm/1000*fs)*sconv+3*fftl+1),1]); 109 | imap=1:length(sy); 110 | imap=min(length(f0l),((imap-1)/fs*1000/shiftm/sconv+1)); 111 | end; 112 | imap=[imap ones(1,round(fs*0.2))*length(f0l)]; % safe guard 113 | ix=find(imap>=length(f0l), 1, 'first'); 114 | rmap=interp1(imap(1:ix),1:ix,1:length(f0l)); 115 | 116 | phs=fractpitch2(fftl); % phs will have smooth phase function for unit delay 117 | 118 | fftl2=fftl/2; 119 | nsyn=length(sy); 120 | idx=1; 121 | bb=1:fftl; 122 | rbb2=fftl/2:-1:2; 123 | 124 | %------- parameters for noize based apf design 125 | t=((1:fftl)-fftl/2-1)/fftl*2; 126 | adjd=1.0./(1+exp(-20*t)); % correction function for smooth transition at fs/2 127 | gw=exp(-0.25*pi*(fs*(t/2)/gdbw).^2); % slope difinition function 128 | gw=gw/sum(gw); % gdbw is the equvalent rectangular band width 129 | fgw=real(fft(fftshift(gw))); % gw is the spectral smoothing window 130 | df=fs/fftl*2*pi; % normalization constant for integration and differentiation 131 | fw=(1:fftl2+1)/fftl*fs; % frequency axis 132 | 133 | trbw=300; % width of transition area 134 | rho=1.0./(1+exp(-(fw-cornf)/trbw)); % rondom group delay weighting function 135 | 136 | %--------- frozen group delay component calculation ------ 137 | nz=randn(1,fftl2+1).*rho; % This is not effective. Left for randn status. 138 | %--------- 139 | lft=1-hanning(fftl)+nz(1)*0; % +nz(1)*0 is dummy 140 | lft=1.0./(1+exp(-(lft-0.5)*60)); 141 | ww=1.0./(1+exp(-(hanning(fftl)-0.3)*23)); % lifter for 142 | 143 | iin=1; 144 | if imgi==1; hpg=waitbar(0,'voiced part synthesis'); end; % 10/Aug./2005 145 | icntr=0; 146 | dmx=max(max(n2sgram)); 147 | while (idx < nsyn-fftl-10) && (ceil(iin)0) && (f0l(round(ii))>0) 169 | if f0l(round((ii+tii)/2))>0 % fix by H.K. on 17/Dec./2002 170 | f0=max(lowestF0/pcnv,f0l(round((ii+tii)/2))); % mid point 171 | else 172 | f0=f0l(round(ii)); 173 | end; 174 | f0=f0*pcnv; 175 | end; 176 | 177 | %- -------- 178 | ff=[n2sgram(round(idcv(:)),round(ii)); ... 179 | n2sgram(round(idcv(rbb2)),round(ii))]; 180 | ccp=real(fft(log(ff+dmx/1000000))); % 24 Sept. 1999 10000 -> 1000000 181 | ccp2=[ccp(1);2*ccp(2:fftl/2);0*ccp(fftl/2+1:fftl)]; 182 | ffx=(fft(ccp2.*lft)/fftl); 183 | nidx=round(idx); 184 | 185 | nf0=fs/f0; 186 | frt=idx-nidx; 187 | frtz=exp(1i*phs*frt)'; % This was in a wrong sign! 188 | 189 | nz=randn(1,fftl2+1).*rho; %((rho*0+1)*mixNlow(round(ii))+(1-mixNlow(round(ii)))*rho); 190 | nz=real(ifft(fft([nz,nz(rbb2)]).*fgw)); 191 | nz=nz*sqrt(fftl*gdbw/fs); % correction factor for noise 192 | if delfracind, delsp=delfrac*1000/f0; end; 193 | nz=nz*delsp*df/1000; 194 | mz=cumsum([nz(1:fftl2+1),nz(rbb2)])-nz(1); 195 | mmz=-(mz-adjd*(rem((mz(fftl)+mz(2)),2*pi)-2*pi)); 196 | pzr=exp(-1i*mmz)'; %.*[wlcut wlcut(rbb2)]'; % set ineffective 01/June/2001 197 | 198 | pz=pzr; % This makes random group delay to be effective 199 | wnz=aprm(round(idcv(:)),round(ii)); % 06/May/2001 This is correct! 200 | wpr=sqrt(max(0,1-wnz.*wnz)); % 23/Sept./1999 201 | 202 | rx=randn(round(nf0),1); 203 | %----------- temporal envelope control of the aperiodic component --- 204 | zt0=nf0/fs+rx(1)*0; % +rx(1)*0 is a dummy 205 | ztc=0.01; % time constant 10ms (for example) 206 | ztp=((1:round(nf0))'-1)/fs; 207 | nev=sqrt(2*zt0/ztc/(1-exp(-2*zt0/ztc)))*exp(-ztp/ztc); 208 | rx=randn(round(nf0),1); 209 | wfv=fft((rx-mean(rx)).*nev,fftl); % DC component removal 8/April/2002 210 | %-------------------------------------------------------------------- 211 | 212 | ep=0*real(ffx); 213 | nf0n=round(nf0); 214 | gh=hanning(nf0n*2); 215 | ep(1:nf0n)=gh(nf0n:-1:1); 216 | ep(end:-1:end-nf0n+2)=ep(2:nf0n); % bug fix on 29/Jan./2003 217 | ep=-ep/sum(ep); 218 | ep(1)=ep(1)+1; 219 | epf=fft(ep); 220 | tx=fftshift(real(ifft(epf.*exp(ffx).*pz.*frtz.*[wpr;wpr(rbb2)]))).*ww; % 8/April/2002 221 | tx2=fftshift(real(ifft(exp(ffx).*frtz.*[wnz;wnz(rbb2)].*wfv))).*ww; % 31/May/2001 222 | sy(bb+nidx)=sy(bb+nidx)+(tx*sqrt(nf0)+tx2)*(f0raw(round(ii))>0); % 02/ Sept./1999 223 | idx=idx+nf0; 224 | iin=min(max(1,round(imap(round(idx)))),min(njj,length(f0raw))); % modification on 5/Dec/2002 based on comments by M. Tsuzaki 225 | if (f0raw(round(ii))==0) && (f0raw(round(iin))>0) % (mixVlow(round(ii))<0.8) & (mixVlow(round(iin))>0.8) 226 | idxo=idx; 227 | ipos=find(f0raw(round(ii:iin))>0, 1, 'first')-1+ii; 228 | if isempty(ipos) 229 | idx=idxo; 230 | else 231 | idx=max(idxo-nf0+1,rmap(round(ipos))); % 11/August/2002 (Was -1 mistake??) 232 | end; 233 | end; 234 | end; 235 | if imgi==1; close(hpg); end; % 10/Aug./2005 236 | ii=1; 237 | idx=1; 238 | f0=1000; 239 | 240 | if imgi==1; hpg=waitbar(0,'unvoiced part synthesis'); end; % 10/Aug./2005 241 | icntr=0; 242 | while (idx < nsyn-fftl) && (ii0.03 247 | ff=[n2sgram(round(idcv(:)),ii);n2sgram(round(idcv(rbb2)),ii)]; 248 | ccp=real(fft(log(ff+dmx/100000))); % 23rd July, 1999 % 24th Sept. 249 | ccp2=[ccp(1);2*ccp(2:fftl/2);0*ccp(fftl/2+1:fftl)]; 250 | ffx=(fft(ccp2.*lft)/fftl); 251 | nf0=fs/f0; 252 | tx=fftshift(real(ifft(exp(ffx)))); 253 | rx=randn([round(nf0),1]); 254 | tnx=fftfilt(rx-mean(rx),tx); % DC component removal 8/April/2002 255 | sy(bb+nidx)=sy(bb+nidx)+tnx(bb).*ww; 256 | end; 257 | idx=idx+nf0; 258 | ii=round(imap(round(idx))); 259 | end; 260 | if imgi==1; close(hpg); end; % 10/Aug./2005 261 | sy2=sy(fftl/2+(1:ix)); 262 | sy=sy2; 263 | switch nargout 264 | case {1} 265 | case {2} 266 | synthSataus = statusReport; 267 | end; 268 | end 269 | -------------------------------------------------------------------------------- /src/straightpanel98.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HidekiKawahara/legacy_STRAIGHT/964684981fe12cd232c5e882259dff126b3af0f2/src/straightpanel98.mat -------------------------------------------------------------------------------- /src/straightsound.m: -------------------------------------------------------------------------------- 1 | function ok=straightsound(x,fs) 2 | % Up sampling for reducing aliasing 3 | % Requested by Dr. Uematsu of NTT, 02/02/1998 4 | 5 | switch fs 6 | case 8000 7 | soundsc(interp(x/32768,4),fs*4); 8 | case 10000 9 | soundsc(interp(x/32768,4),fs*4); 10 | case 11025 11 | soundsc(interp(x/32768,4),fs*4); 12 | case 12000 13 | soundsc(interp(x/32768,4),fs*4); 14 | case 16000 15 | soundsc(interp(x/32768,2),fs*2); 16 | case 20000 17 | soundsc(interp(x/32768,2),fs*2); 18 | case 22050 19 | soundsc(interp(x/32768,2),fs*2); 20 | case 24000 21 | soundsc(interp(x/32768,2),fs*2); 22 | otherwise, 23 | soundsc(x/32768,fs); 24 | end 25 | ok='ok'; 26 | -------------------------------------------------------------------------------- /src/syncgui.m: -------------------------------------------------------------------------------- 1 | function oki=syncgui() 2 | 3 | % synchronize GUI and internal values 4 | 5 | global n2sgram nsgram n3sgram n2sgrambk n3sgramE xold x f0floor f0ceil fs framem shiftm f0shiftm ... 6 | fftl eta pc framel fftl2 acth pwth pcnv fconv sconv delsp gdbw cornf fname ofname delfracind ... 7 | tpath cpath paraminitialized mag delfrac hr f0raw f0l f0var f0varL sy pcorr pecorr ... 8 | upsampleon gobjlist hhb defaultendian indefaultendian outdefaultendian 9 | 10 | framel=round(framem*fs/1000); 11 | if fftl