├── .gitignore ├── test ├── main.cpp └── org │ └── stand │ ├── AutoTest.cpp │ ├── AutoTest.h │ ├── SimpleScoreTest.h │ ├── utau │ └── UtauOtoHashTest.h │ └── util │ └── MusicalNoteTest.h ├── readme.md ├── SimpleSynthesizerApp.pro ├── SimpleSynthesizer.pro ├── core ├── org │ └── stand │ │ ├── util │ │ ├── MusicalNote.cpp │ │ └── MusicalNote.h │ │ ├── world │ │ ├── WaveUtil.h │ │ ├── Signal.h │ │ ├── Signal.cpp │ │ ├── World.h │ │ ├── WaveUtil.cpp │ │ ├── Specgrams.h │ │ ├── World.cpp │ │ └── Specgrams.cpp │ │ ├── SimpleScore.h │ │ ├── SimpleNote.h │ │ ├── SimpleSynthesizer.h │ │ ├── utau │ │ ├── UtauOtoHash.h │ │ ├── UtauPhoneme.h │ │ ├── UtauPhoneme.cpp │ │ └── UtauOtoHash.cpp │ │ ├── SimpleScore.cpp │ │ ├── SimpleCorpus.h │ │ ├── SimpleNote.cpp │ │ ├── SimpleCorpus.cpp │ │ └── SimpleSynthesizer.cpp └── world │ ├── constant_numbers.h │ ├── fft.h │ ├── synthesis.h │ ├── star.h │ ├── platinum.h │ ├── tandem_ap.h │ ├── synthesis_ap.h │ ├── common.h │ ├── dio.h │ ├── synthesis.cpp │ ├── common.cpp │ ├── matlabfunctions.h │ ├── star.cpp │ ├── synthesis_ap.cpp │ ├── platinum.cpp │ ├── matlabfunctions.cpp │ ├── tandem_ap.cpp │ └── dio.cpp ├── SimpleSynthesizerTest.pro ├── app └── main.cpp ├── SimpleSynthesizerCore.pro ├── SimpleSynthesizerApp.pro.user └── SimpleSynthesizer.pro.user /.gitignore: -------------------------------------------------------------------------------- 1 | *.user 2 | -------------------------------------------------------------------------------- /test/main.cpp: -------------------------------------------------------------------------------- 1 | #include "org/stand/AutoTest.h" 2 | 3 | using namespace org::stand; 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | return QAutoTest::run(argc, argv); 8 | } 9 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | SimpleSynthesizer 2 | --- 3 | SimpleSynthesizer is a simple singing synthesis for UTAU voicebanks with WORLD. 4 | 5 | 6 | 7 | License 8 | --- 9 | 10 | BSD 11 | 12 | Requirement 13 | --- 14 | 15 | Qt 5.2.0 MinGW 16 | -------------------------------------------------------------------------------- /SimpleSynthesizerApp.pro: -------------------------------------------------------------------------------- 1 | include(SimpleSynthesizerCore.pro) 2 | 3 | CONFIG += console 4 | 5 | TEMPLATE = app 6 | 7 | HEADERS += 8 | 9 | SOURCES += \ 10 | app/main.cpp 11 | 12 | INCLUDEPATH += \ 13 | core \ 14 | app 15 | 16 | QMAKE_CXXFLAGS += -Wall -std=c++11 17 | -------------------------------------------------------------------------------- /SimpleSynthesizer.pro: -------------------------------------------------------------------------------- 1 | TEMPLATE = subdirs 2 | 3 | SUBDIRS = \ 4 | SimpleSynthesizerCore.pro \ 5 | SimpleSynthesizerApp.pro \ 6 | SimpleSynthesizerTest.pro 7 | 8 | Core.subdir = SimpleSynthesizerCore.pro 9 | 10 | App.subdir = SimpleSynthesizerApp.pro 11 | App.depends = Core 12 | 13 | Test.subdir = SimpleSynthesizerTest.pro 14 | Test.depends = Core 15 | -------------------------------------------------------------------------------- /core/org/stand/util/MusicalNote.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "MusicalNote.h" 4 | 5 | using namespace org::stand::util; 6 | 7 | double MusicalNote::frequencyFromNote(double note, double A4Frequency) 8 | { 9 | return pow(2.0, (note - A4Note) / 12.0) * A4Frequency; 10 | } 11 | 12 | double MusicalNote::noteFromFrequency(double frequency, double A4Frequency) 13 | { 14 | return A4Note + 12.0 * log2(frequency / A4Frequency); 15 | } 16 | -------------------------------------------------------------------------------- /test/org/stand/AutoTest.cpp: -------------------------------------------------------------------------------- 1 | #include "AutoTest.h" 2 | 3 | using namespace org::stand; 4 | 5 | QHash &QAutoTest::tests() 6 | { 7 | static QHash t; 8 | return t; 9 | } 10 | 11 | int QAutoTest::run(int argc, char *argv[]) 12 | { 13 | int ret = 0; 14 | foreach (QObject* test, tests().values()) 15 | { 16 | ret |= QTest::qExec(test, argc, argv); 17 | } 18 | return ret; 19 | } 20 | -------------------------------------------------------------------------------- /core/org/stand/world/WaveUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef WAVEUTIL_H 2 | #define WAVEUTIL_H 3 | 4 | namespace org 5 | { 6 | namespace stand 7 | { 8 | namespace world 9 | { 10 | 11 | class Signal; 12 | 13 | /** 14 | * @brief The WaveUtil class is a utility class for wave file. 15 | */ 16 | class WaveUtil 17 | { 18 | public: 19 | static Signal *read(const char *filepath); 20 | static void write(const char *filepath, const Signal *signal); 21 | }; 22 | 23 | } 24 | } 25 | } 26 | 27 | #endif // WAVEUTIL_H 28 | -------------------------------------------------------------------------------- /core/org/stand/util/MusicalNote.h: -------------------------------------------------------------------------------- 1 | #ifndef MUSICALNOTE_H 2 | #define MUSICALNOTE_H 3 | 4 | namespace org 5 | { 6 | namespace stand 7 | { 8 | namespace util 9 | { 10 | 11 | namespace MusicalNote 12 | { 13 | const static double A4Frequency = 440.0; 14 | const static double A4Note = 69.0; 15 | 16 | double noteFromFrequency(double frequency, double A4Frequency = A4Frequency); 17 | 18 | double frequencyFromNote(double note, double A4Frequency = A4Frequency); 19 | 20 | } 21 | 22 | } 23 | } 24 | } 25 | 26 | #endif // MUSICALNOTE_H 27 | -------------------------------------------------------------------------------- /SimpleSynthesizerTest.pro: -------------------------------------------------------------------------------- 1 | include(SimpleSynthesizerCore.pro) 2 | 3 | QT += testlib core 4 | QT -= gui 5 | 6 | CONFIG += console 7 | CONFIG += test 8 | 9 | TEMPLATE = app 10 | 11 | HEADERS += \ 12 | test/org/stand/AutoTest.h \ 13 | test/org/stand/utau/UtauOtoHashTest.h \ 14 | test/org/stand/SimpleScoreTest.h \ 15 | test/org/stand/util/MusicalNoteTest.h 16 | 17 | SOURCES += \ 18 | test/main.cpp \ 19 | test/org/stand/AutoTest.cpp 20 | 21 | INCLUDEPATH += \ 22 | core \ 23 | test 24 | 25 | QMAKE_CXXFLAGS += -Wall -std=c++11 26 | -------------------------------------------------------------------------------- /core/org/stand/world/Signal.h: -------------------------------------------------------------------------------- 1 | #ifndef SIGNAL_H 2 | #define SIGNAL_H 3 | 4 | namespace org 5 | { 6 | namespace stand 7 | { 8 | namespace world 9 | { 10 | 11 | /** 12 | * @brief The Signal class represents a vocal signal synthesized by WORLD. 13 | */ 14 | class Signal 15 | { 16 | public: 17 | Signal(int length, int samplingFrequency); 18 | virtual ~Signal(); 19 | 20 | void clear(); 21 | 22 | double *wave; 23 | int length; 24 | int samplingFrequency; 25 | 26 | private: 27 | }; 28 | 29 | } 30 | } 31 | } 32 | 33 | #endif // SIGNAL_H 34 | -------------------------------------------------------------------------------- /core/org/stand/world/Signal.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Signal.h" 4 | 5 | using namespace org::stand::world; 6 | 7 | Signal::Signal(int length, int samplingFrequency) 8 | { 9 | this->wave = new double[length]; 10 | this->samplingFrequency = samplingFrequency; 11 | this->length = length; 12 | } 13 | 14 | Signal::~Signal() 15 | { 16 | delete[] wave; 17 | } 18 | 19 | void Signal::clear() 20 | { 21 | if(!wave || length <= 0) 22 | { 23 | return; 24 | } 25 | 26 | for(int i = 0; i < length; i++) 27 | { 28 | wave[i] = 0.0; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /core/org/stand/SimpleScore.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLESCORE_H 2 | #define SIMPLESCORE_H 3 | 4 | #include 5 | #include 6 | #include "SimpleNote.h" 7 | 8 | namespace org 9 | { 10 | namespace stand 11 | { 12 | 13 | typedef QList SimpleScore; 14 | 15 | class SimpleScoreFactory 16 | { 17 | public: 18 | explicit SimpleScoreFactory(QTextCodec *codec = QTextCodec::codecForName("Shift-JIS")); 19 | virtual ~SimpleScoreFactory(){ } 20 | 21 | virtual SimpleScore read(const QFileInfo &fileinfo) const; 22 | virtual SimpleScore parse(const QList &lines) const; 23 | 24 | private: 25 | QTextCodec *textCodec; 26 | }; 27 | 28 | } 29 | } 30 | 31 | #endif // SIMPLESCORE_H 32 | -------------------------------------------------------------------------------- /test/org/stand/AutoTest.h: -------------------------------------------------------------------------------- 1 | #ifndef AUTOTEST_H 2 | #define AUTOTEST_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace org 10 | { 11 | namespace stand 12 | { 13 | 14 | namespace QAutoTest 15 | { 16 | QHash &tests(); 17 | 18 | int run(int argc, char *argv[]); 19 | } 20 | 21 | template class Test 22 | { 23 | public: 24 | QSharedPointer test; 25 | Test(const QString &name) : test(new T) 26 | { 27 | if(!QAutoTest::tests().contains(name)) 28 | { 29 | QAutoTest::tests()[name] = test.data(); 30 | } 31 | } 32 | }; 33 | 34 | } 35 | } 36 | 37 | #define DECLARE_TEST(className) static org::stand::Test t(#className); 38 | 39 | #endif // AUTOTEST_H 40 | -------------------------------------------------------------------------------- /core/org/stand/world/World.h: -------------------------------------------------------------------------------- 1 | #ifndef WORLD_H 2 | #define WORLD_H 3 | 4 | namespace org 5 | { 6 | namespace stand 7 | { 8 | namespace world 9 | { 10 | 11 | class Specgrams; 12 | class Signal; 13 | 14 | class AnalysisAndSynthesis 15 | { 16 | public: 17 | virtual ~AnalysisAndSynthesis(){ } 18 | virtual Specgrams *analyze(Signal *wave, double msFramePeriod) = 0; 19 | virtual Signal *synthesize(Specgrams *spectrograms, int samplingFrequency) = 0; 20 | }; 21 | 22 | /** 23 | * @brief The World class implements AnalysisAndSynthesis. 24 | * This class represents WORLD. 25 | */ 26 | class World : public AnalysisAndSynthesis 27 | { 28 | public: 29 | Specgrams *analyze(Signal *wave, double msFramePeriod); 30 | Signal *synthesize(Specgrams *spectrograms, int samplingFrequency); 31 | }; 32 | 33 | } 34 | } 35 | } 36 | 37 | #endif // WORLD_H 38 | -------------------------------------------------------------------------------- /core/org/stand/world/WaveUtil.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "world/matlabfunctions.h" 3 | 4 | #include "Signal.h" 5 | 6 | #include "WaveUtil.h" 7 | 8 | using namespace org::stand::world; 9 | 10 | Signal *WaveUtil::read(const char *filepath) 11 | { 12 | int fs, bit, length; 13 | double *wave = wavread(filepath, &fs, &bit, &length); 14 | if(!wave || length <= 0) 15 | { 16 | return NULL; 17 | } 18 | 19 | Signal *signal = new Signal(length, fs); 20 | if(!signal) 21 | { 22 | return NULL; 23 | } 24 | 25 | for(int i = 0; i < length; i++) 26 | { 27 | signal->wave[i] = wave[i]; 28 | } 29 | 30 | delete[] wave; 31 | return signal; 32 | } 33 | 34 | void WaveUtil::write(const char *filepath, const Signal *signal) 35 | { 36 | wavwrite(signal->wave, signal->length, signal->samplingFrequency, 16, filepath); 37 | } 38 | -------------------------------------------------------------------------------- /core/org/stand/SimpleNote.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLENOTE_H 2 | #define SIMPLENOTE_H 3 | 4 | #include 5 | 6 | namespace org 7 | { 8 | namespace stand 9 | { 10 | 11 | /** 12 | * @brief The SimpleNote class represents a single musical note. 13 | */ 14 | class SimpleNote 15 | { 16 | public: 17 | SimpleNote(const QString &pronounce, double msPosition, double msDuration, int note); 18 | SimpleNote(const SimpleNote &that); 19 | SimpleNote &operator =(const SimpleNote &other); 20 | 21 | virtual ~SimpleNote(){ } 22 | 23 | bool operator ==(const SimpleNote &other) const; 24 | bool operator <(const SimpleNote &other) const; 25 | 26 | double msEnd() const; 27 | 28 | QString pronounce; 29 | double msPosition; 30 | double msDuration; 31 | int note; 32 | 33 | static bool canParseLine(const QString &line); 34 | static SimpleNote parseLine(const QString &line); 35 | }; 36 | 37 | } 38 | } 39 | 40 | #endif // SIMPLENOTE_H 41 | -------------------------------------------------------------------------------- /core/org/stand/SimpleSynthesizer.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLESYNTHESIZER_H 2 | #define SIMPLESYNTHESIZER_H 3 | 4 | #include "SimpleScore.h" 5 | 6 | namespace org 7 | { 8 | namespace stand 9 | { 10 | namespace world 11 | { 12 | class Signal; 13 | } 14 | 15 | class SimpleCorpus; 16 | class SimpleCorpusFactory; 17 | class SipmleNote; 18 | 19 | class SimpleSynthesizer 20 | { 21 | public: 22 | SimpleSynthesizer(const QFileInfo &otoFilepath, int samplingFrequency = 44100, const SimpleCorpusFactory &corpusFactory = SimpleCorpusFactory()); 23 | virtual ~SimpleSynthesizer(); 24 | virtual world::Signal *synthesize(const SimpleScore &score); 25 | 26 | private: 27 | world::Signal *signalForNote(const SimpleNote ¬e); 28 | void addNoteSignalToResult(world::Signal *result, const world::Signal *signalForNote, const SimpleNote ¬e); 29 | 30 | SimpleCorpus *corpus; 31 | int samplingFrequency; 32 | }; 33 | 34 | } 35 | } 36 | 37 | #endif // SIMPLESYNTHESIZER_H 38 | -------------------------------------------------------------------------------- /core/org/stand/world/Specgrams.h: -------------------------------------------------------------------------------- 1 | #ifndef SPECGRAMS_H 2 | #define SPECGRAMS_H 3 | 4 | namespace org 5 | { 6 | namespace stand 7 | { 8 | namespace world 9 | { 10 | 11 | /** 12 | * @brief The AnalysisResult class represents an analysis result of voice signal by WORLD. 13 | */ 14 | class Specgrams 15 | { 16 | public: 17 | virtual ~Specgrams(); 18 | Specgrams(const Specgrams &other); 19 | 20 | double msFramePeriod; 21 | int frameLength; 22 | int fftSize; 23 | double *f0; 24 | double *timeAxis; 25 | double **specgram; 26 | double **residual; 27 | 28 | double msLength() const; 29 | 30 | static Specgrams *create(int frameLength, int fftSize, double msFramePeriod); 31 | private: 32 | Specgrams(int frameLength, int fftSize, double msFramePeriod); 33 | void copy(double *dst, const double *src, int length); 34 | static double **allocMatrix(int frameLength, int fftSize); 35 | }; 36 | 37 | } 38 | } 39 | } 40 | 41 | #endif // SPECGRAMS_H 42 | -------------------------------------------------------------------------------- /core/org/stand/utau/UtauOtoHash.h: -------------------------------------------------------------------------------- 1 | #ifndef UTAUOTOHASH_H 2 | #define UTAUOTOHASH_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "UtauPhoneme.h" 9 | 10 | namespace org 11 | { 12 | namespace stand 13 | { 14 | namespace utau 15 | { 16 | /** 17 | * @brief The UtauOtoHash class represents UTAU oto.ini. 18 | */ 19 | typedef QHash UtauOtoHash; 20 | 21 | class UtauOtoHashFactory 22 | { 23 | public: 24 | explicit UtauOtoHashFactory(QTextCodec *codec = QTextCodec::codecForName("Shift-JIS")); 25 | UtauOtoHashFactory(const UtauOtoHashFactory &other); 26 | UtauOtoHashFactory &operator =(const UtauOtoHashFactory &other); 27 | 28 | virtual ~UtauOtoHashFactory(){} 29 | 30 | virtual UtauOtoHash read(const QFileInfo &fileinfo) const; 31 | virtual UtauOtoHash parse(const QList &otoList) const; 32 | protected: 33 | QTextCodec *textCodec; 34 | }; 35 | 36 | } 37 | } 38 | } 39 | 40 | #endif // UTAUOTOHASH_H 41 | -------------------------------------------------------------------------------- /core/world/constant_numbers.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // This header file only defines constant numbers used for several function. 6 | //----------------------------------------------------------------------------- 7 | 8 | #ifndef WORLD_CONSTANT_NUMBERS_H_ 9 | #define WORLD_CONSTANT_NUMBERS_H_ 10 | 11 | namespace world { 12 | const double kPi = 3.1415926535897932384; 13 | const double kMySafeGuardMinimum = 0.000000000001; 14 | const double kFloorF0 = 71.0; 15 | const double kDefaultF0 = 150.0; 16 | const double kLog2 = 0.69314718055994529; 17 | // Maximum standard deviation not to be selected as a best f0. 18 | const double kMaximumValue = 100000.0; 19 | // Note to me (fs: 44100) 20 | // 71 Hz is the limit to maintain the FFT size at 2048. 21 | // If we use 70 Hz as FLOOR_F0, the FFT size of 4096 is required. 22 | 23 | } // namespace world 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /core/org/stand/utau/UtauPhoneme.h: -------------------------------------------------------------------------------- 1 | #ifndef UTAUPHONEME_H 2 | #define UTAUPHONEME_H 3 | 4 | #include 5 | 6 | namespace org 7 | { 8 | namespace stand 9 | { 10 | namespace utau 11 | { 12 | 13 | /** 14 | * @brief The UtauPhoneme class represents a single UTAU phoneme in oto.ini. 15 | */ 16 | class UtauPhoneme 17 | { 18 | public: 19 | UtauPhoneme(const QString &pronounce, 20 | const QString &filename, 21 | double msLeftBlank, 22 | double msFixedLength, 23 | double msRightBlank, 24 | double msPreutterance, 25 | double msOverlap); 26 | UtauPhoneme(const UtauPhoneme &that); 27 | virtual ~UtauPhoneme(){} 28 | 29 | bool operator ==(const UtauPhoneme &that) const; 30 | 31 | UtauPhoneme copy() const; 32 | 33 | QString pronounce; 34 | QString filename; 35 | 36 | double msLeftBlank; 37 | double msFixedLength; 38 | double msRightBlank; 39 | double msPreutterance; 40 | double msOverlap; 41 | }; 42 | 43 | } 44 | } 45 | } 46 | 47 | #endif // UTAUPHONEME_H 48 | -------------------------------------------------------------------------------- /core/org/stand/SimpleScore.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "SimpleNote.h" 5 | 6 | #include "SimpleScore.h" 7 | 8 | using namespace org::stand; 9 | 10 | SimpleScoreFactory::SimpleScoreFactory(QTextCodec *codec) 11 | { 12 | textCodec = codec; 13 | } 14 | 15 | SimpleScore SimpleScoreFactory::read(const QFileInfo &fileinfo) const 16 | { 17 | QFile file(fileinfo.absoluteFilePath()); 18 | if(!file.open(QFile::ReadOnly)) 19 | { 20 | return SimpleScore(); 21 | } 22 | QTextStream stream(&file); 23 | stream.setCodec(textCodec); 24 | 25 | SimpleScore score; 26 | QList lines; 27 | while(!stream.atEnd()) 28 | { 29 | QString line = stream.readLine(); 30 | lines.append(line); 31 | } 32 | file.close(); 33 | return parse(lines); 34 | } 35 | 36 | SimpleScore SimpleScoreFactory::parse(const QList &lines) const 37 | { 38 | SimpleScore score; 39 | foreach(const QString &line, lines) 40 | { 41 | if(SimpleNote::canParseLine(line)) 42 | { 43 | score.append(SimpleNote::parseLine(line)); 44 | } 45 | else 46 | { 47 | printf("SimpleScoreFactory could not parse line: %s\n", line.toLocal8Bit().data()); 48 | } 49 | } 50 | qSort(score); 51 | return score; 52 | } 53 | -------------------------------------------------------------------------------- /core/world/fft.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // These functions and variables are defined to use FFT as well as FFTW 6 | // Please see fft.cpp to show the detailed information 7 | //----------------------------------------------------------------------------- 8 | #ifndef WORLD_FFT_H_ 9 | #define WORLD_FFT_H_ 10 | 11 | // Commands for FFT (This is the same as FFTW) 12 | #define FFT_FORWARD 1 13 | #define FFT_BACKWARD 2 14 | #define FFT_ESTIMATE 3 15 | 16 | // Complex number for FFT 17 | typedef double fft_complex[2]; 18 | // Struct used for FFT 19 | typedef struct { 20 | int n; 21 | int sign; 22 | unsigned int flags; 23 | fft_complex *c_in; 24 | double *in; 25 | fft_complex *c_out; 26 | double *out; 27 | double *input; 28 | int *ip; 29 | double *w; 30 | } fft_plan; 31 | 32 | fft_plan fft_plan_dft_1d(int n, fft_complex *in, fft_complex *out, int sign, 33 | unsigned int flags); 34 | fft_plan fft_plan_dft_c2r_1d(int n, fft_complex *in, double *out, 35 | unsigned int flags); 36 | fft_plan fft_plan_dft_r2c_1d(int n, double *in, fft_complex *out, 37 | unsigned int flags); 38 | void fft_execute(fft_plan p); 39 | void fft_destroy_plan(fft_plan p); 40 | 41 | #endif // WORLD_FFT_H_ 42 | -------------------------------------------------------------------------------- /core/org/stand/SimpleCorpus.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLECORPUS_H 2 | #define SIMPLECORPUS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "org/stand/utau/UtauOtoHash.h" 9 | #include "org/stand/world/Specgrams.h" 10 | 11 | namespace org 12 | { 13 | namespace stand 14 | { 15 | namespace world 16 | { 17 | class World; 18 | } 19 | 20 | class SimpleCorpus 21 | { 22 | public: 23 | virtual ~SimpleCorpus(); 24 | 25 | virtual const world::Specgrams *get(const QString &pronounce); 26 | virtual const utau::UtauPhoneme *phonemeInfo(const QString &pronounce) const; 27 | 28 | private: 29 | SimpleCorpus(const utau::UtauOtoHash &oto, const QDir &otoDir, double msFramePeriod, world::World *world); 30 | 31 | QHash data; 32 | world::World *world; 33 | 34 | utau::UtauOtoHash oto; 35 | QDir otoDir; 36 | double msFramePeriod; 37 | 38 | friend class SimpleCorpusFactory; 39 | }; 40 | 41 | class SimpleCorpusFactory 42 | { 43 | public: 44 | SimpleCorpusFactory(double msFramePeriod = 5.0, utau::UtauOtoHashFactory otoFactory = utau::UtauOtoHashFactory()); 45 | virtual ~SimpleCorpusFactory(){ } 46 | 47 | virtual SimpleCorpus *create(const QFileInfo &fileinfo) const; 48 | 49 | private: 50 | utau::UtauOtoHashFactory otoFactory; 51 | double msFramePeriod; 52 | }; 53 | 54 | } 55 | } 56 | 57 | #endif // SIMPLECORPUS_H 58 | -------------------------------------------------------------------------------- /core/world/synthesis.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | //----------------------------------------------------------------------------- 5 | #ifndef WORLD_SYNTHESIS_H_ 6 | #define WORLD_SYNTHESIS_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Synthesis() synthesizes the voice from three parameters. 10 | // Input: 11 | // f0 : f0 contour 12 | // f0_length : Length of f0 13 | // spectrogram : Spectrogram (WORLD assumes spectrogram by Star()) 14 | // residual_spectrogram : Extracted spectrum of the excitation signal 15 | // fft_size : FFT size used for Star() and Platinum() 16 | // frame_period : Temporal inverval for Dio() 17 | // fs : Sampling frequency 18 | // y_length : Length of Output (allocated in advance) 19 | // Output: 20 | // y : Synthesized voice 21 | //----------------------------------------------------------------------------- 22 | void Synthesis(double *f0, int f0_length, double **spectrogram, 23 | double **residual_spectrogram, int fft_size, double frame_period, int fs, 24 | int y_length, double *y); 25 | 26 | #endif // WORLD_SYNTHESIS_H_ 27 | -------------------------------------------------------------------------------- /core/world/star.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | //----------------------------------------------------------------------------- 5 | 6 | #ifndef WORLD_STAR_H_ 7 | #define WORLD_STAR_H_ 8 | 9 | //----------------------------------------------------------------------------- 10 | // Star() calculates the spectrogram that consists of spectral envelopes 11 | // estimated by STAR. 12 | // Input: 13 | // x : Input signal 14 | // xLen : Length of x 15 | // fs : Sampling frequency 16 | // timeAxis : Time axis 17 | // f0 : F0 contour 18 | // Output: 19 | // spectrogram : Spectrogram estimated by STAR. 20 | //----------------------------------------------------------------------------- 21 | void Star(double *x, int x_length, int fs, double *time_axis, double *f0, 22 | int f0_length, double **spectrogram); 23 | 24 | //----------------------------------------------------------------------------- 25 | // GetFFTSizeForStar() calculates the FFT size based on the sampling frequency 26 | // and the lower limit of f0 (It is defined in world.h). 27 | // Input: 28 | // fs : Sampling frequency 29 | // Output: 30 | // FFT size 31 | //----------------------------------------------------------------------------- 32 | int GetFFTSizeForStar(int fs); 33 | 34 | #endif // WORLD_STAR_H_ 35 | -------------------------------------------------------------------------------- /core/world/platinum.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | //----------------------------------------------------------------------------- 5 | #ifndef WORLD_PLATINUM_H_ 6 | #define WORLD_PLATINUM_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platinum() calculates the spectrum of the excitation signal. 10 | // Exciation signal is calculated by convoluting the windowed signal and 11 | // Inverse function of the spectral envelope. The minimum phase is used as the 12 | // phase of the spectral envelope. 13 | // Input: 14 | // x : Input signal 15 | // x_length : Length of x 16 | // fs : Sampling frequency 17 | // time_axis : Temporal positions used for calculating the 18 | // excitation signal 19 | // f0 : f0 contour 20 | // spectrogram : Spectrogram (WORLD assumes spectrogram by Star()) 21 | // Output: 22 | // residual_spectrogram : Extracted spectrum of the excitation signal 23 | //----------------------------------------------------------------------------- 24 | void Platinum(double *x, int x_length, int fs, double *time_axis, double *f0, 25 | int f0_length, double **spectrogram, int fft_size, 26 | double **residual_spectrogram); 27 | #endif // WORLD_PLATINUM_H_ 28 | -------------------------------------------------------------------------------- /core/org/stand/utau/UtauPhoneme.cpp: -------------------------------------------------------------------------------- 1 | #include "UtauPhoneme.h" 2 | 3 | using namespace org::stand::utau; 4 | 5 | UtauPhoneme::UtauPhoneme(const QString &pronounce, 6 | const QString &filename, 7 | double msLeftBlank, 8 | double msFixedLength, 9 | double msRightBlank, 10 | double msPreutterance, 11 | double msOverlap) : 12 | pronounce(pronounce), 13 | filename(filename), 14 | msLeftBlank(msLeftBlank), 15 | msFixedLength(msFixedLength), 16 | msRightBlank(msRightBlank), 17 | msPreutterance(msPreutterance), 18 | msOverlap(msOverlap) 19 | { 20 | } 21 | 22 | UtauPhoneme::UtauPhoneme(const UtauPhoneme &that) : 23 | UtauPhoneme(that.pronounce, that.filename, that.msLeftBlank, that.msFixedLength, that.msRightBlank, that.msPreutterance, that.msOverlap) 24 | { 25 | } 26 | 27 | UtauPhoneme UtauPhoneme::copy() const 28 | { 29 | return UtauPhoneme(*this); 30 | } 31 | 32 | bool UtauPhoneme::operator ==(const UtauPhoneme &other) const 33 | { 34 | return this->filename == other.filename && 35 | this->msFixedLength == other.msFixedLength && 36 | this->msLeftBlank == other.msLeftBlank && 37 | this->msOverlap == other.msOverlap && 38 | this->msPreutterance == other.msPreutterance && 39 | this->msRightBlank == other.msRightBlank && 40 | this->pronounce == other.pronounce; 41 | } 42 | -------------------------------------------------------------------------------- /app/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "org/stand/SimpleCorpus.h" 4 | #include "org/stand/SimpleScore.h" 5 | #include "org/stand/SimpleSynthesizer.h" 6 | #include "org/stand/world/WaveUtil.h" 7 | #include "org/stand/world/World.h" 8 | 9 | using namespace org::stand; 10 | 11 | void synthesizeScore(const char *outPath, const char *scorePath, const char *otoPath) 12 | { 13 | QFileInfo otoFileInfo(otoPath); 14 | QFileInfo scoreFileInfo(scorePath); 15 | 16 | SimpleSynthesizer synthesizer(otoFileInfo); 17 | world::Signal *signal = synthesizer.synthesize(SimpleScoreFactory().read(scoreFileInfo)); 18 | 19 | if(!signal) 20 | { 21 | printf("invalid data;\n score path = `%s`\n oto path = `%s`\n", scorePath, otoPath); 22 | return; 23 | } 24 | 25 | world::WaveUtil().write(outPath, signal); 26 | } 27 | 28 | void simpleSynthesize(const char *outPath, const char *inPath) 29 | { 30 | world::Signal *signal = world::WaveUtil().read(inPath); 31 | world::Specgrams *specgrams = world::World().analyze(signal, 5.0); 32 | for(int i = 0; i < specgrams->frameLength; i++) 33 | { 34 | specgrams->f0[i] = 440.0; 35 | } 36 | world::WaveUtil().write(outPath, world::World().synthesize(specgrams, 44100)); 37 | } 38 | 39 | int main(int argc, char *argv[]) 40 | { 41 | // simpleSynthesize("c:\\projects\\momo_out.wav", "c:\\projects\\momo.wav"); 42 | synthesizeScore("c:\\projects\\test.wav", "c:\\projects\\score.txt", "c:\\projects\\momotan\\oto.ini"); 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /core/org/stand/SimpleNote.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "SimpleNote.h" 4 | 5 | using namespace org::stand; 6 | 7 | SimpleNote::SimpleNote(const QString &pronounce, double position, double msDuration, int note) : 8 | pronounce(pronounce), msPosition(position), msDuration(msDuration), note(note) 9 | { 10 | } 11 | 12 | SimpleNote::SimpleNote(const SimpleNote &that) : 13 | SimpleNote(that.pronounce, that.msPosition, that.msDuration, that.note) 14 | { 15 | } 16 | 17 | SimpleNote &SimpleNote::operator =(const SimpleNote &that) 18 | { 19 | this->pronounce = that.pronounce; 20 | this->msPosition = that.msPosition; 21 | this->msDuration = that.msDuration; 22 | this->note = that.note; 23 | return (*this); 24 | } 25 | 26 | SimpleNote SimpleNote::parseLine(const QString &line) 27 | { 28 | QStringList params = line.split(","); 29 | return SimpleNote(params[0], params[1].toDouble(), params[2].toDouble(), params[3].toInt()); 30 | } 31 | 32 | bool SimpleNote::canParseLine(const QString &line) 33 | { 34 | return line.split(",").size() == 4; 35 | } 36 | 37 | double SimpleNote::msEnd() const 38 | { 39 | return msPosition + msDuration; 40 | } 41 | 42 | bool SimpleNote::operator ==(const SimpleNote &other) const 43 | { 44 | return this->msDuration == other.msDuration && 45 | this->msPosition == other.msPosition && 46 | this->note == other.note && 47 | this->pronounce == other.pronounce; 48 | } 49 | 50 | bool SimpleNote::operator <(const SimpleNote &other) const 51 | { 52 | return this->msEnd() < other.msEnd(); 53 | } 54 | -------------------------------------------------------------------------------- /core/world/tandem_ap.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | //----------------------------------------------------------------------------- 5 | #ifndef WORLD_TANDEM_AP_H_ 6 | #define WORLD_TANDEM_AP_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // GetNumberOfBands() calculate the number of bands for aperiodicity. 10 | // Input: 11 | // fs : Sampling frequency 12 | // Output: 13 | // The number of bands required for the calculation 14 | //----------------------------------------------------------------------------- 15 | int GetNumberOfBands(int fs); 16 | 17 | //----------------------------------------------------------------------------- 18 | // The latest version of aperiodicity estimation in TANDEM-STRAIGHT. 19 | // This function skipped several complex processes. 20 | // Input: 21 | // x : Input signal 22 | // x_length : Length of x 23 | // f0 : f0 contour 24 | // f0_length : Length of f0 25 | // frame_period : Time interval for analysis 26 | // Output: 27 | // aperiodicity : Estimated aperiodicity 28 | // Value used for the aperiodicity estimation. This value is used for 29 | // the synthesis. 30 | //----------------------------------------------------------------------------- 31 | double AperiodicityRatio(double *x, int x_length, int fs, double *f0, 32 | int f0_length, double frame_period, double **aperiodicity); 33 | 34 | #endif // WORLD_TANDEM_AP_H_ 35 | -------------------------------------------------------------------------------- /core/world/synthesis_ap.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | //----------------------------------------------------------------------------- 5 | #ifndef WORLD_SYNTHESIS_AP_H_ 6 | #define WORLD_SYNTHESIS_AP_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // synthesis_ap() synthesize the voice based on f0, spectrogram and 10 | // aperiodicity (not excitation signal). 11 | // Input: 12 | // f0 : f0 contour 13 | // f0_length : Length of f0 14 | // spectrogram : Spectrogram estimated by STAR 15 | // fft_size : FFT size 16 | // aperiodicity : Aperiodicity spectrogram based on TANDEM_AP 17 | // number_of_bands : Number of frequency bands used for TANDEM_AP 18 | // target_f0 : Only a parameter in TANDEM_AP 19 | // frame_period : Temporal period used for the analysis 20 | // fs : Sampling frequency 21 | // y_length : Length of the output signal (Memory of y has been 22 | // allocated in advance) 23 | // Output: 24 | // y : Calculated glottal pulse 25 | //----------------------------------------------------------------------------- 26 | void SynthesisFromAperiodicity(double *f0, int f0_length, double **spectrogram, 27 | int fft_size, double **aperiodicity, int number_of_bands, double target_f0, 28 | double frame_period, int fs, int y_length, double *synthesisOut); 29 | 30 | #endif // WORLD_SYNTHESIS_AP_H_ 31 | -------------------------------------------------------------------------------- /test/org/stand/SimpleScoreTest.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLESCORETEST_H 2 | #define SIMPLESCORETEST_H 3 | 4 | #include 5 | #include 6 | 7 | #include "org/stand/AutoTest.h" 8 | #include "org/stand/SimpleScore.h" 9 | 10 | Q_DECLARE_METATYPE(org::stand::SimpleScore) 11 | 12 | namespace org 13 | { 14 | namespace stand 15 | { 16 | 17 | class SimpleScoreTest : public QObject 18 | { 19 | Q_OBJECT 20 | private: 21 | SimpleScore some(const SimpleNote ¬e) 22 | { 23 | SimpleScore score; 24 | score.append(note); 25 | return score; 26 | } 27 | 28 | SimpleScore none() 29 | { 30 | return SimpleScore(); 31 | } 32 | 33 | private slots: 34 | void parse_should_correctly_parse_lines_data() 35 | { 36 | QTest::addColumn("input"); 37 | QTest::addColumn("expected"); 38 | 39 | QTest::newRow(" valid1") << "pronounce,0,0,60" << some(SimpleNote("pronounce", 0, 0, 60)); 40 | QTest::newRow(" valid2") << "pronounce,1,1.5,127" << some(SimpleNote("pronounce", 1, 1.5, 127)); 41 | QTest::newRow(" valid3") << "あああああ,1,1.5,127" << some(SimpleNote("あああああ", 1, 1.5, 127)); 42 | QTest::newRow("invalid1") << "pronounce,0,60" << none(); 43 | QTest::newRow("invalid2") << "totally invalid." << none(); 44 | } 45 | 46 | void parse_should_correctly_parse_lines() 47 | { 48 | QFETCH(QString, input); 49 | QFETCH(SimpleScore, expected); 50 | QStringList ins; 51 | ins.append(input); 52 | 53 | SimpleScoreFactory factory(QTextCodec::codecForName("utf-8")); 54 | QCOMPARE(factory.parse(ins), expected); 55 | } 56 | }; 57 | } 58 | } 59 | 60 | DECLARE_TEST(org::stand::SimpleScoreTest) 61 | 62 | #endif // SIMPLESCORETEST_H 63 | -------------------------------------------------------------------------------- /SimpleSynthesizerCore.pro: -------------------------------------------------------------------------------- 1 | QT -= gui 2 | 3 | CONFIG += console 4 | CONFIG -= app_bundle 5 | 6 | TEMPLATE = lib 7 | 8 | HEADERS += \ 9 | core/org/stand/utau/UtauPhoneme.h \ 10 | core/org/stand/utau/UtauOtoHash.h \ 11 | core/world/common.h \ 12 | core/world/constant_numbers.h \ 13 | core/world/dio.h \ 14 | core/world/fft.h \ 15 | core/world/matlabfunctions.h \ 16 | core/world/platinum.h \ 17 | core/world/star.h \ 18 | core/world/synthesis.h \ 19 | core/world/synthesis_ap.h \ 20 | core/world/tandem_ap.h \ 21 | core/org/stand/world/World.h \ 22 | core/org/stand/world/Specgrams.h \ 23 | core/org/stand/world/Signal.h \ 24 | core/org/stand/world/WaveUtil.h \ 25 | core/org/stand/SimpleNote.h \ 26 | core/org/stand/SimpleScore.h \ 27 | core/org/stand/util/MusicalNote.h \ 28 | core/org/stand/SimpleCorpus.h \ 29 | core/org/stand/SimpleSynthesizer.h 30 | 31 | SOURCES += \ 32 | core/org/stand/utau/UtauPhoneme.cpp \ 33 | core/org/stand/utau/UtauOtoHash.cpp \ 34 | core/world/common.cpp \ 35 | core/world/dio.cpp \ 36 | core/world/fft.cpp \ 37 | core/world/matlabfunctions.cpp \ 38 | core/world/platinum.cpp \ 39 | core/world/star.cpp \ 40 | core/world/synthesis.cpp \ 41 | core/world/synthesis_ap.cpp \ 42 | core/world/tandem_ap.cpp \ 43 | core/org/stand/world/World.cpp \ 44 | core/org/stand/world/Specgrams.cpp \ 45 | core/org/stand/world/Signal.cpp \ 46 | core/org/stand/world/WaveUtil.cpp \ 47 | core/org/stand/SimpleNote.cpp \ 48 | core/org/stand/SimpleScore.cpp \ 49 | core/org/stand/util/MusicalNote.cpp \ 50 | core/org/stand/SimpleCorpus.cpp \ 51 | core/org/stand/SimpleSynthesizer.cpp 52 | 53 | INCLUDEPATH += \ 54 | core 55 | 56 | QMAKE_CXXFLAGS += -Wall -std=c++11 57 | -------------------------------------------------------------------------------- /core/org/stand/SimpleCorpus.cpp: -------------------------------------------------------------------------------- 1 | #include "org/stand/utau/UtauPhoneme.h" 2 | #include "org/stand/world/WaveUtil.h" 3 | #include "org/stand/world/World.h" 4 | 5 | #include "SimpleCorpus.h" 6 | 7 | using namespace org::stand; 8 | 9 | SimpleCorpusFactory::SimpleCorpusFactory(double msFramePeriod, utau::UtauOtoHashFactory otoFactory) : 10 | otoFactory(otoFactory), msFramePeriod(msFramePeriod) 11 | { 12 | } 13 | 14 | SimpleCorpus *SimpleCorpusFactory::create(const QFileInfo &fileinfo) const 15 | { 16 | SimpleCorpus *result = new SimpleCorpus(otoFactory.read(fileinfo), fileinfo.dir(), msFramePeriod, new world::World); 17 | return result; 18 | } 19 | 20 | SimpleCorpus::SimpleCorpus(const utau::UtauOtoHash &oto, const QDir &otoDir, double msFramePeriod, world::World *world) : 21 | oto(oto), otoDir(otoDir), msFramePeriod(msFramePeriod), world(world) 22 | { 23 | } 24 | 25 | SimpleCorpus::~SimpleCorpus() 26 | { 27 | qDeleteAll(data); 28 | delete world; 29 | } 30 | 31 | const world::Specgrams *SimpleCorpus::get(const QString &pronounce) 32 | { 33 | if(!data.contains(pronounce)) 34 | { 35 | if(oto.contains(pronounce)) 36 | { 37 | world::WaveUtil waveUtil; 38 | QFileInfo fileinfo(otoDir.filePath(oto.find(pronounce).value().filename)); 39 | data[pronounce] = world->analyze(waveUtil.read(fileinfo.absoluteFilePath().toLocal8Bit().data()), msFramePeriod); 40 | } 41 | else 42 | { 43 | // Pronounce doesn't exists. Explicit null will be inserted. 44 | data[pronounce] = NULL; 45 | } 46 | } 47 | 48 | return data[pronounce]; 49 | } 50 | 51 | const utau::UtauPhoneme *SimpleCorpus::phonemeInfo(const QString &pronounce) const 52 | { 53 | if(!oto.contains(pronounce)) 54 | { 55 | return NULL; 56 | } 57 | return &(oto.find(pronounce).value()); 58 | } 59 | -------------------------------------------------------------------------------- /test/org/stand/utau/UtauOtoHashTest.h: -------------------------------------------------------------------------------- 1 | #ifndef UTAUOTOHASHTEST_H 2 | #define UTAUOTOHASHTEST_H 3 | 4 | #include "org/stand/AutoTest.h" 5 | #include "org/stand/utau/UtauOtoHash.h" 6 | #include "org/stand/utau/UtauPhoneme.h" 7 | 8 | #include 9 | #include 10 | 11 | Q_DECLARE_METATYPE(org::stand::utau::UtauOtoHash) 12 | 13 | namespace org 14 | { 15 | namespace stand 16 | { 17 | namespace utau 18 | { 19 | 20 | class UtauOtoHashTest : public QObject 21 | { 22 | Q_OBJECT 23 | private: 24 | UtauOtoHash some(const UtauPhoneme &phoneme) 25 | { 26 | UtauOtoHash result; 27 | result.insert(phoneme.pronounce, phoneme); 28 | return result; 29 | } 30 | UtauOtoHash none() 31 | { 32 | return UtauOtoHash(); 33 | } 34 | 35 | private slots: 36 | void parseLine_should_parse_one_line_of_utau_oto_ini_data() 37 | { 38 | QTest::addColumn("input"); 39 | QTest::addColumn("expected"); 40 | 41 | QTest::newRow(" no alias") << "あ.wav=,98,50,51,100,150" << some(UtauPhoneme("あ", "あ.wav", 98, 50, 51, 100, 150)); 42 | QTest::newRow("with alias") << "あ.wav=- あ,98,50,51,100,150" << some(UtauPhoneme("- あ", "あ.wav", 98, 50, 51, 100, 150)); 43 | QTest::newRow(" invalid1") << "あ.wav=,98,50,51,100" << none(); 44 | QTest::newRow(" invalid2") << "あ.wav=98,50,51,100,150" << none(); 45 | QTest::newRow(" invalid3") << "totally invalid" << none(); 46 | } 47 | 48 | void parseLine_should_parse_one_line_of_utau_oto_ini() 49 | { 50 | QFETCH(QString, input); 51 | QFETCH(UtauOtoHash, expected); 52 | QStringList ins; 53 | ins.append(input); 54 | 55 | UtauOtoHashFactory factory(QTextCodec::codecForName("UTF-8")); 56 | UtauOtoHash actual = factory.parse(ins); 57 | QCOMPARE(actual, expected); 58 | } 59 | }; 60 | 61 | } 62 | } 63 | } 64 | 65 | DECLARE_TEST(org::stand::utau::UtauOtoHashTest) 66 | 67 | #endif // UTAUOTOHASHTEST_H 68 | -------------------------------------------------------------------------------- /core/org/stand/world/World.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "world/dio.h" 4 | #include "world/star.h" 5 | #include "world/platinum.h" 6 | #include "world/synthesis.h" 7 | 8 | #include "Specgrams.h" 9 | #include "Signal.h" 10 | 11 | #include "World.h" 12 | 13 | using namespace org::stand::world; 14 | 15 | Specgrams *World::analyze(Signal *signal, double msFramePeriod) 16 | { 17 | if(!signal) 18 | { 19 | return NULL; 20 | } 21 | 22 | int frameLength = GetSamplesForDIO(signal->samplingFrequency, signal->length, msFramePeriod); 23 | int fftSize = GetFFTSizeForStar(signal->samplingFrequency); 24 | 25 | Specgrams *result = Specgrams::create(frameLength, fftSize, msFramePeriod); 26 | if(!result) 27 | { 28 | return NULL; 29 | } 30 | 31 | Dio(signal->wave, signal->length, signal->samplingFrequency, msFramePeriod, result->timeAxis, result->f0); 32 | Star(signal->wave, signal->length, signal->samplingFrequency, result->timeAxis, result->f0, frameLength, result->specgram); 33 | Platinum(signal->wave, signal->length, signal->samplingFrequency, result->timeAxis, result->f0, frameLength, result->specgram, fftSize, result->residual); 34 | 35 | return result; 36 | } 37 | 38 | Signal *World::synthesize(Specgrams *spectrograms, int samplingFrequency) 39 | { 40 | if(!spectrograms) 41 | { 42 | return NULL; 43 | } 44 | 45 | double secLength = spectrograms->msLength() / 1000.0; 46 | int waveLength = secLength * samplingFrequency; 47 | 48 | Signal *result = new Signal(waveLength, samplingFrequency); 49 | if(!result) 50 | { 51 | return NULL; 52 | } 53 | 54 | result->clear(); 55 | Synthesis(spectrograms->f0, 56 | spectrograms->frameLength, 57 | spectrograms->specgram, 58 | spectrograms->residual, 59 | spectrograms->fftSize, 60 | spectrograms->msFramePeriod, 61 | result->samplingFrequency, 62 | result->length, 63 | result->wave); 64 | return result; 65 | } 66 | -------------------------------------------------------------------------------- /core/org/stand/utau/UtauOtoHash.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "UtauOtoHash.h" 6 | 7 | using namespace org::stand::utau; 8 | 9 | UtauOtoHashFactory::UtauOtoHashFactory(QTextCodec *codec) 10 | : textCodec(codec) 11 | { 12 | } 13 | 14 | UtauOtoHashFactory::UtauOtoHashFactory(const UtauOtoHashFactory &other) 15 | : UtauOtoHashFactory(other.textCodec) 16 | { 17 | } 18 | 19 | UtauOtoHashFactory &UtauOtoHashFactory::operator =(const UtauOtoHashFactory &other) 20 | { 21 | this->textCodec = other.textCodec; 22 | return (*this); 23 | } 24 | 25 | UtauOtoHash UtauOtoHashFactory::read(const QFileInfo &fileinfo) const 26 | { 27 | QFile file(fileinfo.absoluteFilePath()); 28 | if(!file.open(QFile::ReadOnly)) 29 | { 30 | return UtauOtoHash(); 31 | } 32 | QTextStream stream(&file); 33 | stream.setCodec(textCodec); 34 | 35 | QList otoList; 36 | while(!stream.atEnd()) 37 | { 38 | otoList.append(stream.readLine()); 39 | } 40 | file.close(); 41 | return parse(otoList); 42 | } 43 | 44 | UtauOtoHash UtauOtoHashFactory::parse(const QList &otoList) const 45 | { 46 | UtauOtoHash result; 47 | foreach(const QString &line, otoList) 48 | { 49 | QStringList params = line.split(","); 50 | if(params.size() != 6) 51 | { 52 | continue; 53 | } 54 | QStringList fileAndAlias = params[0].split("="); 55 | if(fileAndAlias.size() != 2) 56 | { 57 | continue; 58 | } 59 | QString filename = fileAndAlias[0]; 60 | QString pronounce = fileAndAlias[1].isEmpty() ? fileAndAlias[0].split(".")[0] : fileAndAlias[1]; 61 | double msLeftBlank = params[1].toDouble(); 62 | double msFixedLength = params[2].toDouble(); 63 | double msRightBlank = params[3].toDouble(); 64 | double msPreutterance = params[4].toDouble(); 65 | double msOverlap = params[5].toDouble(); 66 | result.insert(pronounce, UtauPhoneme(pronounce, filename, msLeftBlank, msFixedLength, msRightBlank, msPreutterance, msOverlap)); 67 | } 68 | return result; 69 | } 70 | -------------------------------------------------------------------------------- /core/org/stand/world/Specgrams.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Specgrams.h" 4 | 5 | using namespace org::stand::world; 6 | 7 | Specgrams::Specgrams(int frameLength, int fftSize, double msFramePeriod) 8 | { 9 | this->frameLength = frameLength; 10 | this->fftSize = fftSize; 11 | this->msFramePeriod = msFramePeriod; 12 | f0 = new double[frameLength]; 13 | timeAxis = new double[frameLength]; 14 | specgram = allocMatrix(frameLength, fftSize); 15 | residual = allocMatrix(frameLength, fftSize); 16 | } 17 | 18 | Specgrams::Specgrams(const Specgrams &other) 19 | { 20 | frameLength = other.frameLength; 21 | fftSize = other.fftSize; 22 | msFramePeriod = other.msFramePeriod; 23 | f0 = new double[frameLength]; 24 | timeAxis = new double[frameLength]; 25 | specgram = allocMatrix(frameLength, fftSize); 26 | residual = allocMatrix(frameLength, fftSize); 27 | 28 | copy(f0, other.f0, frameLength); 29 | copy(timeAxis, other.timeAxis, frameLength); 30 | copy(specgram[0], other.specgram[0], fftSize * frameLength); 31 | copy(residual[0], other.residual[0], fftSize * frameLength); 32 | } 33 | 34 | Specgrams::~Specgrams() 35 | { 36 | delete[] f0; 37 | delete[] timeAxis; 38 | if(specgram) 39 | { 40 | delete[] specgram[0]; 41 | } 42 | delete[] specgram; 43 | if(residual) 44 | { 45 | delete residual[0]; 46 | } 47 | } 48 | 49 | double **Specgrams::allocMatrix(int frameLength, int fftSize) 50 | { 51 | double **result = new double*[frameLength]; 52 | result[0] = new double[frameLength * fftSize]; 53 | for(int i = 1; i < frameLength; i++) 54 | { 55 | result[i] = result[0] + i * fftSize; 56 | } 57 | return result; 58 | } 59 | 60 | void Specgrams::copy(double *dst, const double *src, int length) 61 | { 62 | for(int i = 0; i < length; i++) 63 | { 64 | dst[i] = src[i]; 65 | } 66 | } 67 | 68 | Specgrams *Specgrams::create(int frameLength, int fftSize, double msFramePeriod) 69 | { 70 | if(frameLength <= 0 || fftSize <= 0 || msFramePeriod <= 0.0) 71 | { 72 | return NULL; 73 | } 74 | return new Specgrams(frameLength, fftSize, msFramePeriod); 75 | } 76 | 77 | double Specgrams::msLength() const 78 | { 79 | return frameLength * msFramePeriod; 80 | } 81 | -------------------------------------------------------------------------------- /test/org/stand/util/MusicalNoteTest.h: -------------------------------------------------------------------------------- 1 | #ifndef MUSICALNOTETEST_H 2 | #define MUSICALNOTETEST_H 3 | 4 | #include 5 | #include 6 | 7 | #include "org/stand/AutoTest.h" 8 | #include "org/stand/util/MusicalNote.h" 9 | 10 | namespace org 11 | { 12 | namespace stand 13 | { 14 | namespace util 15 | { 16 | 17 | class MusicalNoteTest : public QObject 18 | { 19 | Q_OBJECT 20 | private slots: 21 | void frequency_from_note_should_pass_all_test_cases_data() 22 | { 23 | QTest::addColumn("input"); 24 | QTest::addColumn("expected"); 25 | QTest::newRow("C4") << 60.0 << 261.6255653005986; 26 | QTest::newRow("C+4") << 61.0 << 277.1826309768721; 27 | QTest::newRow("D4") << 62.0 << 293.6647679174076; 28 | QTest::newRow("D+4") << 63.0 << 311.12698372208087; 29 | QTest::newRow("E4") << 64.0 << 329.6275569128699; 30 | QTest::newRow("F4") << 65.0 << 349.2282314330039; 31 | QTest::newRow("F+4") << 66.0 << 369.9944227116344; 32 | QTest::newRow("G4") << 67.0 << 391.99543598174927; 33 | QTest::newRow("G+4") << 68.0 << 415.3046975799451; 34 | QTest::newRow("A4") << 69.0 << 440.0; 35 | QTest::newRow("A+4") << 70.0 << 466.1637615180899; 36 | QTest::newRow("B4") << 71.0 << 493.8833012561241; 37 | } 38 | 39 | void frequency_from_note_should_pass_all_test_cases() 40 | { 41 | QFETCH(double, input); 42 | QFETCH(double, expected); 43 | 44 | QCOMPARE(MusicalNote::frequencyFromNote(input), expected); 45 | } 46 | 47 | void note_from_frequency_should_pass_all_test_cases_data() 48 | { 49 | QTest::addColumn("input"); 50 | QTest::addColumn("expected"); 51 | QTest::newRow("C4") << 261.6255653005986 << 60.0; 52 | QTest::newRow("C+4") << 277.1826309768721 << 61.0; 53 | QTest::newRow("D4") << 293.6647679174076 << 62.0; 54 | QTest::newRow("D+4") << 311.12698372208087 << 63.0; 55 | QTest::newRow("E4") << 329.6275569128699 << 64.0; 56 | QTest::newRow("F4") << 349.2282314330039 << 65.0; 57 | QTest::newRow("F+4") << 369.9944227116344 << 66.0; 58 | QTest::newRow("G4") << 391.99543598174927 << 67.0; 59 | QTest::newRow("G+4") << 415.3046975799451 << 68.0; 60 | QTest::newRow("A4") << 440.0 << 69.0; 61 | QTest::newRow("A+4") << 466.1637615180899 << 70.0; 62 | QTest::newRow("B4") << 493.8833012561241 << 71.0; 63 | } 64 | 65 | void note_from_frequency_should_pass_all_test_cases() 66 | { 67 | QFETCH(double, input); 68 | QFETCH(double, expected); 69 | 70 | QCOMPARE(MusicalNote::noteFromFrequency(input), expected); 71 | } 72 | }; 73 | 74 | } 75 | } 76 | } 77 | 78 | DECLARE_TEST(org::stand::util::MusicalNoteTest) 79 | 80 | #endif // MUSICALNOTETEST_H 81 | -------------------------------------------------------------------------------- /core/world/common.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | #ifndef WORLD_COMMON_H_ 5 | #define WORLD_COMMON_H_ 6 | 7 | #include "./fft.h" 8 | 9 | //----------------------------------------------------------------------------- 10 | // Structs on FFT 11 | //----------------------------------------------------------------------------- 12 | // Forward FFT in the real sequence 13 | typedef struct { 14 | int fft_size; 15 | double *waveform; 16 | fft_complex *spectrum; 17 | fft_plan forward_fft; 18 | } ForwardRealFFT; 19 | 20 | // Inverse FFT in the real sequence 21 | typedef struct { 22 | int fft_size; 23 | double *waveform; 24 | fft_complex *spectrum; 25 | fft_plan inverse_fft; 26 | } InverseRealFFT; 27 | 28 | // Minimum phase analysis from logarithmic power spectrum 29 | typedef struct { 30 | int fft_size; 31 | double *log_spectrum; 32 | fft_complex *minimum_phase_spectrum; 33 | fft_complex *cepstrum; 34 | fft_plan inverse_fft; 35 | fft_plan forward_fft; 36 | } MinimumPhaseAnalysis; 37 | 38 | //----------------------------------------------------------------------------- 39 | // GetSuitableFFTSize() calculates the suitable FFT size. 40 | // The size is defined as the minimum length whose length is longer than 41 | // the input sample. 42 | // Input: 43 | // sample : Length of the input signal 44 | // Output: 45 | // Suitable FFT size 46 | //----------------------------------------------------------------------------- 47 | int GetSuitableFFTSize(int sample); 48 | 49 | //----------------------------------------------------------------------------- 50 | // These four functions are simple max() and min() function 51 | // for "int" and "double" type. 52 | //----------------------------------------------------------------------------- 53 | inline int MyMax(int x, int y) { 54 | return x > y ? x : y; 55 | } 56 | inline double MyMax(double x, double y) { 57 | return x > y ? x : y; 58 | } 59 | inline int MyMin(int x, int y) { 60 | return x < y ? x : y; 61 | } 62 | inline double MyMin(double x, double y) { 63 | return x < y ? x : y; 64 | } 65 | 66 | //----------------------------------------------------------------------------- 67 | // These functions are used to speed up the processing. 68 | // Forward FFT 69 | void InitializeForwardRealFFT(int fft_size, ForwardRealFFT *forward_real_fft); 70 | void DestroyForwardRealFFT(ForwardRealFFT *forward_real_fft); 71 | 72 | // Inverse FFT 73 | void InitializeInverseRealFFT(int fft_size, InverseRealFFT *inverse_real_fft); 74 | void DestroyInverseRealFFT(InverseRealFFT *inverse_real_fft); 75 | 76 | // Minimum phase analysis (This analysis uses FFT) 77 | void InitializeMinimumPhaseAnalysis(int fft_size, 78 | MinimumPhaseAnalysis *minimum_phase); 79 | void GetMinimumPhaseSpectrum(MinimumPhaseAnalysis *minimum_phase); 80 | void DestroyMinimumPhaseAnalysis(MinimumPhaseAnalysis *minimum_phase); 81 | 82 | #endif // WORLD_COMMON_H_ 83 | -------------------------------------------------------------------------------- /core/world/dio.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_DIO_H_ 7 | #define WORLD_DIO_H_ 8 | 9 | //----------------------------------------------------------------------------- 10 | // Struct for DIO 11 | //----------------------------------------------------------------------------- 12 | typedef struct { 13 | double f0_floor; 14 | double f0_ceil; 15 | double channels_in_octave; 16 | double frame_period; // msec 17 | int speed; // (1, 2, ..., 12) 18 | } DioOption; 19 | 20 | //----------------------------------------------------------------------------- 21 | // DIO (version 0.1.0) 22 | // You can only change the parameter "frame_period". If you want to change 23 | // other parameters, you should use latest Dio(). 24 | // This version will be destroyed in the future. 25 | // Input: 26 | // x : Input signal 27 | // x_length : Length of x 28 | // fs : Sampling frequency 29 | // frame_period : 30 | // Output: 31 | // time_axis : Temporal positions. 32 | // f0 : F0 contour. 33 | //----------------------------------------------------------------------------- 34 | void Dio(double *x, int x_length, int fs, double frame_period, 35 | double *time_axis, double *f0); 36 | 37 | //----------------------------------------------------------------------------- 38 | // DIO (vertion 0.1.1) 39 | // Input: 40 | // x : Input signal 41 | // x_length : Length of x 42 | // fs : Sampling frequency 43 | // option : Struct to order the parameter for DIO 44 | // Output: 45 | // time_axis : Temporal positions. 46 | // f0 : F0 contour. 47 | //----------------------------------------------------------------------------- 48 | void Dio(double *x, int x_length, int fs, const DioOption option, 49 | double *time_axis, double *f0); 50 | 51 | //----------------------------------------------------------------------------- 52 | // InitializeDioOption allocates the memory to the struct and sets the 53 | // default parameters. 54 | // Output: 55 | // option : Struct for the optional parameter. 56 | //----------------------------------------------------------------------------- 57 | void InitializeDioOption(DioOption *option); 58 | 59 | //----------------------------------------------------------------------------- 60 | // GetSamplesForDIO() calculates the number of samples required for Dio(). 61 | // Input: 62 | // fs : Sampling frequency [Hz] 63 | // x_length : Length of the input signal [Sample]. 64 | // frame_period : Frame shift [msec] 65 | // Output: 66 | // The number of samples required to store the results of Dio() 67 | //----------------------------------------------------------------------------- 68 | int GetSamplesForDIO(int fs, int x_length, double frame_period); 69 | 70 | #endif // WORLD_DIO_H_ 71 | -------------------------------------------------------------------------------- /SimpleSynthesizerApp.pro.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ProjectExplorer.Project.ActiveTarget 7 | -1 8 | 9 | 10 | ProjectExplorer.Project.EditorSettings 11 | 12 | true 13 | false 14 | true 15 | 16 | Cpp 17 | 18 | CppGlobal 19 | 20 | 21 | 22 | QmlJS 23 | 24 | QmlJSGlobal 25 | 26 | 27 | 2 28 | UTF-8 29 | false 30 | 4 31 | false 32 | true 33 | 1 34 | true 35 | 0 36 | true 37 | 0 38 | 8 39 | true 40 | 2 41 | true 42 | true 43 | true 44 | false 45 | 46 | 47 | 48 | ProjectExplorer.Project.PluginSettings 49 | 50 | 51 | 52 | ProjectExplorer.Project.TargetCount 53 | 0 54 | 55 | 56 | ProjectExplorer.Project.Updater.EnvironmentId 57 | {9778d425-4e50-4a61-8877-ebb50ba38180} 58 | 59 | 60 | ProjectExplorer.Project.Updater.FileVersion 61 | 15 62 | 63 | 64 | -------------------------------------------------------------------------------- /core/org/stand/SimpleSynthesizer.cpp: -------------------------------------------------------------------------------- 1 | #include "SimpleCorpus.h" 2 | #include "SimpleNote.h" 3 | #include "SimpleScore.h" 4 | 5 | #include "org/stand/world/Signal.h" 6 | #include "org/stand/world/Specgrams.h" 7 | #include "org/stand/world/World.h" 8 | 9 | #include "org/stand/util/MusicalNote.h" 10 | 11 | #include "SimpleSynthesizer.h" 12 | 13 | using namespace org::stand; 14 | 15 | 16 | SimpleSynthesizer::SimpleSynthesizer(const QFileInfo &otoFilepath, int samplingFrequency, const SimpleCorpusFactory &corpusFactory) : 17 | samplingFrequency(samplingFrequency), corpus(corpusFactory.create(otoFilepath)) 18 | { 19 | } 20 | 21 | SimpleSynthesizer::~SimpleSynthesizer() 22 | { 23 | delete corpus; 24 | } 25 | 26 | world::Signal *SimpleSynthesizer::synthesize(const SimpleScore &score) 27 | { 28 | if(score.isEmpty()) 29 | { 30 | return NULL; 31 | } 32 | double msLength = score.last().msEnd(); 33 | int samples = msLength * samplingFrequency / 1000.0 + 0.5; 34 | 35 | world::Signal *result = new world::Signal(samples, samplingFrequency); 36 | result->clear(); 37 | 38 | foreach(const SimpleNote ¬e, score) 39 | { 40 | printf("now synthesizing -> %s\n", note.pronounce.toUtf8().data()); 41 | world::Signal *signalForNote = this->signalForNote(note); 42 | 43 | addNoteSignalToResult(result, signalForNote, note); 44 | 45 | delete signalForNote; 46 | } 47 | return result; 48 | } 49 | 50 | world::Signal *SimpleSynthesizer::signalForNote(const SimpleNote ¬e) 51 | { 52 | const world::Specgrams *specgrams = corpus->get(note.pronounce); 53 | if(!specgrams) 54 | { 55 | return NULL; 56 | } 57 | const utau::UtauPhoneme *phoneme = corpus->phonemeInfo(note.pronounce); 58 | int frameLength = (note.msDuration + phoneme->msPreutterance) / specgrams->msFramePeriod; 59 | world::Specgrams *buffer = world::Specgrams::create(frameLength, specgrams->fftSize, specgrams->msFramePeriod); 60 | 61 | for(int i = 0; i < buffer->frameLength; i++) 62 | { 63 | int sourceIndex = i + phoneme->msLeftBlank / specgrams->msFramePeriod; 64 | sourceIndex = qMin(qMax(sourceIndex, 0), specgrams->frameLength); 65 | buffer->f0[i] = util::MusicalNote::frequencyFromNote(note.note); 66 | 67 | for(int j = 0; j < buffer->fftSize; j++) 68 | { 69 | buffer->specgram[i][j] = specgrams->specgram[sourceIndex][j]; 70 | } 71 | for(int j = 0; j < buffer->fftSize; j++) 72 | { 73 | buffer->residual[i][j] = specgrams->residual[sourceIndex][j]; 74 | } 75 | } 76 | 77 | world::Signal *result = world::World().synthesize(buffer, samplingFrequency); 78 | delete buffer; 79 | 80 | return result; 81 | } 82 | 83 | void SimpleSynthesizer::addNoteSignalToResult(world::Signal *result, const world::Signal *signalForNote, const SimpleNote ¬e) 84 | { 85 | const utau::UtauPhoneme *phoneme = corpus->phonemeInfo(note.pronounce); 86 | 87 | int resultBegin = qMax((int)(note.msPosition * samplingFrequency / 1000.0), 0); 88 | int resultEnd = note.msEnd() * samplingFrequency / 1000.0 + 0.5; 89 | 90 | for(int i = resultBegin, j = 0; i < resultEnd && j < signalForNote->length; i++, j++) 91 | { 92 | result->wave[i] += signalForNote->wave[j]; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /core/world/synthesis.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // Voice synthesis based on f0, spectrogram and spectrogram of 6 | // excitation signal. 7 | //----------------------------------------------------------------------------- 8 | #include 9 | #include 10 | #include 11 | #include "./synthesis.h" 12 | #include "./matlabfunctions.h" 13 | #include "./common.h" 14 | #include "./constant_numbers.h" 15 | 16 | namespace { 17 | 18 | //----------------------------------------------------------------------------- 19 | // GetOneFrameSegment() calculates a glottal vibration based on the spectral 20 | // envelope and excitation signal. 21 | // Caution: 22 | // minimum_phase and inverse_real_fft are allocated in advance. This is for 23 | // the rapid processing because set of FFT requires much computational cost. 24 | //----------------------------------------------------------------------------- 25 | void GetOneFrameSegment(double *f0, double **spectrogram, 26 | double **residual_spectrogram, int fft_size, int current_frame, 27 | MinimumPhaseAnalysis *minimum_phase, InverseRealFFT *inverse_real_fft, 28 | double *y) { 29 | for (int i = 0; i <= minimum_phase->fft_size / 2; ++i) 30 | minimum_phase->log_spectrum[i] = 31 | log(spectrogram[current_frame][i]) / 2.0; 32 | GetMinimumPhaseSpectrum(minimum_phase); 33 | 34 | inverse_real_fft->spectrum[0][0] = 35 | minimum_phase->minimum_phase_spectrum[0][0] * 36 | residual_spectrogram[current_frame][0]; 37 | inverse_real_fft->spectrum[0][1] = 0; 38 | 39 | for (int i = 1; i < fft_size / 2; ++i) { 40 | inverse_real_fft->spectrum[i][0] = 41 | minimum_phase->minimum_phase_spectrum[i][0] * 42 | residual_spectrogram[current_frame][(i - 1) * 2 + 1] - 43 | minimum_phase->minimum_phase_spectrum[i][1] * 44 | residual_spectrogram[current_frame][i * 2]; 45 | inverse_real_fft->spectrum[i][1] = 46 | minimum_phase->minimum_phase_spectrum[i][0] * 47 | residual_spectrogram[current_frame][i * 2] + 48 | minimum_phase->minimum_phase_spectrum[i][1] * 49 | residual_spectrogram[current_frame][(i - 1) * 2 + 1]; 50 | } 51 | inverse_real_fft->spectrum[fft_size / 2][0] = 52 | minimum_phase->minimum_phase_spectrum[fft_size / 2][0] * 53 | residual_spectrogram[current_frame][fft_size - 1]; 54 | inverse_real_fft->spectrum[fft_size / 2][1] = 0; 55 | fft_execute(inverse_real_fft->inverse_fft); 56 | 57 | for (int i = 0; i < fft_size; ++i) 58 | y[i] = inverse_real_fft->waveform[i] / fft_size; 59 | } 60 | 61 | } // namespace 62 | 63 | void Synthesis(double *f0, int f0_length, double **spectrogram, 64 | double **residual_spectrogram, int fft_size, double frame_period, 65 | int fs, int y_length, double *y) { 66 | double *impulse_response = new double[fft_size]; 67 | 68 | MinimumPhaseAnalysis minimum_phase = {0}; 69 | InitializeMinimumPhaseAnalysis(fft_size, &minimum_phase); 70 | InverseRealFFT inverse_real_fft = {0}; 71 | InitializeInverseRealFFT(fft_size, &inverse_real_fft); 72 | 73 | double current_time = 0.0; 74 | int current_position = 0; 75 | int current_frame = 0; 76 | // Length used for the synthesis is unclear. 77 | const int kFrameLength = 3 * fft_size / 4; 78 | 79 | while (1) { 80 | GetOneFrameSegment(f0, spectrogram, residual_spectrogram, fft_size, 81 | current_frame, &minimum_phase, &inverse_real_fft, impulse_response); 82 | 83 | for (int i = current_position; 84 | i < MyMin(current_position + kFrameLength, y_length - 1); ++i) 85 | y[i] += impulse_response[i - current_position]; 86 | 87 | // update 88 | current_time += 89 | 1.0 / (f0[current_frame] == 0.0 ? world::kDefaultF0 : f0[current_frame]); 90 | current_frame = matlab_round(current_time / (frame_period / 1000.0)); 91 | current_position = static_cast(current_time * fs); 92 | if (current_frame >= f0_length) break; 93 | } 94 | 95 | DestroyMinimumPhaseAnalysis(&minimum_phase); 96 | DestroyInverseRealFFT(&inverse_real_fft); 97 | delete[] impulse_response; 98 | } 99 | -------------------------------------------------------------------------------- /core/world/common.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // common.cpp includes functions used in at least two files. 6 | // (1) Common functions 7 | // (2) FFT, IFFT and minimum phase analysis. 8 | // 9 | // In FFT analysis and minimum phase analysis, 10 | // Functions "Initialize*()" allocate the mamory. 11 | // Functions "Destroy*()" free the accolated memory. 12 | // FFT size is used for initialization, and structs are used to keep the memory. 13 | // Functions "GetMinimumPhaseSpectrum()" calculate minimum phase spectrum. 14 | // Forward and inverse FFT do not have the function "Get*()", 15 | // because forward FFT and inverse FFT can run in one step. 16 | // 17 | //----------------------------------------------------------------------------- 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "./common.h" 23 | #include "./constant_numbers.h" 24 | 25 | //----------------------------------------------------------------------------- 26 | // Fundamental functions 27 | 28 | int GetSuitableFFTSize(int sample) { 29 | return static_cast(pow(2.0, 30 | static_cast(log(static_cast(sample)) / world::kLog2) + 1.0)); 31 | } 32 | 33 | //----------------------------------------------------------------------------- 34 | // FFT, IFFT and minimum phase analysis 35 | void InitializeForwardRealFFT(int fft_size, ForwardRealFFT *forward_real_fft) { 36 | forward_real_fft->fft_size = fft_size; 37 | forward_real_fft->waveform = new double[fft_size]; 38 | forward_real_fft->spectrum = new fft_complex[fft_size]; 39 | forward_real_fft->forward_fft = fft_plan_dft_r2c_1d(fft_size, 40 | forward_real_fft->waveform, forward_real_fft->spectrum, FFT_ESTIMATE); 41 | } 42 | 43 | void DestroyForwardRealFFT(ForwardRealFFT *forward_real_fft) { 44 | fft_destroy_plan(forward_real_fft->forward_fft); 45 | delete[] forward_real_fft->spectrum; 46 | delete[] forward_real_fft->waveform; 47 | } 48 | 49 | void InitializeInverseRealFFT(int fft_size, InverseRealFFT *inverse_real_fft) { 50 | inverse_real_fft->fft_size = fft_size; 51 | inverse_real_fft->waveform = new double[fft_size]; 52 | inverse_real_fft->spectrum = new fft_complex[fft_size]; 53 | inverse_real_fft->inverse_fft = fft_plan_dft_c2r_1d(fft_size, 54 | inverse_real_fft->spectrum, inverse_real_fft->waveform, FFT_ESTIMATE); 55 | } 56 | 57 | void DestroyInverseRealFFT(InverseRealFFT *inverse_real_fft) { 58 | fft_destroy_plan(inverse_real_fft->inverse_fft); 59 | delete[] inverse_real_fft->spectrum; 60 | delete[] inverse_real_fft->waveform; 61 | } 62 | 63 | void InitializeMinimumPhaseAnalysis(int fft_size, 64 | MinimumPhaseAnalysis *minimum_phase) { 65 | minimum_phase->fft_size = fft_size; 66 | minimum_phase->log_spectrum = new double[fft_size]; 67 | minimum_phase->minimum_phase_spectrum = new fft_complex[fft_size]; 68 | minimum_phase->cepstrum = new fft_complex[fft_size]; 69 | minimum_phase->inverse_fft = fft_plan_dft_r2c_1d(fft_size, 70 | minimum_phase->log_spectrum, minimum_phase->cepstrum, FFT_ESTIMATE); 71 | minimum_phase->forward_fft = fft_plan_dft_1d(fft_size, 72 | minimum_phase->cepstrum, minimum_phase->minimum_phase_spectrum, 73 | FFT_FORWARD, FFT_ESTIMATE); 74 | } 75 | 76 | void GetMinimumPhaseSpectrum(MinimumPhaseAnalysis *minimum_phase) { 77 | // Mirroring 78 | for (int i = minimum_phase->fft_size / 2 + 1; 79 | i < minimum_phase->fft_size; ++i) 80 | minimum_phase->log_spectrum[i] = 81 | minimum_phase->log_spectrum[minimum_phase->fft_size - i]; 82 | 83 | // This fft_plan carries out "forward" FFT. 84 | // To carriy out the Inverse FFT, the sign of imaginary part 85 | // is inverted after FFT. 86 | fft_execute(minimum_phase->inverse_fft); 87 | minimum_phase->cepstrum[0][1] *= -1.0; 88 | for (int i = 1; i < minimum_phase->fft_size / 2; ++i) { 89 | minimum_phase->cepstrum[i][0] *= 2.0; 90 | minimum_phase->cepstrum[i][1] *= -2.0; 91 | } 92 | minimum_phase->cepstrum[minimum_phase->fft_size / 2][1] *= -1.0; 93 | for (int i = minimum_phase->fft_size / 2 + 1; 94 | i < minimum_phase->fft_size; ++i) { 95 | minimum_phase->cepstrum[i][0] = 0.0; 96 | minimum_phase->cepstrum[i][1] = 0.0; 97 | } 98 | fft_execute(minimum_phase->forward_fft); 99 | 100 | // This FFT library does not keep the aliasing. 101 | // Since x is complex number, calculation of exp(x) is as following. 102 | double tmp; 103 | for (int i = 0; i <= minimum_phase->fft_size / 2; ++i) { 104 | tmp = exp(minimum_phase->minimum_phase_spectrum[i][0] / 105 | minimum_phase->fft_size); 106 | minimum_phase->minimum_phase_spectrum[i][0] = tmp * 107 | cos(minimum_phase->minimum_phase_spectrum[i][1] / 108 | minimum_phase->fft_size); 109 | minimum_phase->minimum_phase_spectrum[i][1] = tmp * 110 | sin(minimum_phase->minimum_phase_spectrum[i][1] / 111 | minimum_phase->fft_size); 112 | } 113 | } 114 | 115 | void DestroyMinimumPhaseAnalysis(MinimumPhaseAnalysis *minimum_phase) { 116 | fft_destroy_plan(minimum_phase->forward_fft); 117 | fft_destroy_plan(minimum_phase->inverse_fft); 118 | delete[] minimum_phase->cepstrum; 119 | delete[] minimum_phase->log_spectrum; 120 | delete[] minimum_phase->minimum_phase_spectrum; 121 | } 122 | 123 | -------------------------------------------------------------------------------- /core/world/matlabfunctions.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | //----------------------------------------------------------------------------- 5 | #ifndef WORLD_MATLABFUNCTIONS_H_ 6 | #define WORLD_MATLABFUNCTIONS_H_ 7 | 8 | #include "./common.h" 9 | 10 | //----------------------------------------------------------------------------- 11 | // histc() counts the number of values in vector x that fall between the 12 | // elements in the edges vector (which must contain monotonically 13 | // nondecreasing values). n is a length(edges) vector containing these counts. 14 | // No elements of x can be complex. 15 | // http://www.mathworks.co.jp/help/techdoc/ref/histc.html 16 | // Input: 17 | // x : Input vector 18 | // x_length : Length of x 19 | // edges : Input matrix (1-dimension) 20 | // edges_length : Length of edges 21 | // Output: 22 | // index : Result counted in vector x 23 | // Caution: 24 | // Lengths of index and edges must be the same. 25 | //----------------------------------------------------------------------------- 26 | void histc(double *x, int x_length, double *edges, int edges_length, 27 | int *index); 28 | 29 | //----------------------------------------------------------------------------- 30 | // interp1() interpolates to find yi, the values of the underlying function Y 31 | // at the points in the vector or array xi. x must be a vector. 32 | // http://www.mathworks.co.jp/help/techdoc/ref/interp1.html 33 | // Input: 34 | // x : Input vector (Time axis) 35 | // y : Values at x[n] 36 | // x_length : Length of x (Length of y must be the same) 37 | // xi : Required vector 38 | // xi_length : Length of xi (Length of yi must be the same) 39 | // Output: 40 | // yi : Interpolated vector 41 | //----------------------------------------------------------------------------- 42 | void interp1(double *x, double *y, int x_length, double *xi, int xi_length, 43 | double *yi); 44 | 45 | //----------------------------------------------------------------------------- 46 | // decimate() carries out down sampling by both IIR and FIR filters. 47 | // Filter coeffiencts are based on FilterForDecimate(). 48 | // Input: 49 | // x : Input signal 50 | // x_length : Length of x 51 | // r : Coefficient used for down sampling 52 | // (fs after down sampling is fs/r) 53 | // Output: 54 | // y : Output signal 55 | //----------------------------------------------------------------------------- 56 | void decimate(double *x, int x_length, int r, double *y); 57 | 58 | //----------------------------------------------------------------------------- 59 | // matlab_round() calculates rounding. 60 | // Input: 61 | // x : Input value 62 | // Output: 63 | // y : Rounded value 64 | //----------------------------------------------------------------------------- 65 | int matlab_round(double x); 66 | 67 | //----------------------------------------------------------------------------- 68 | // diff() calculates differences and approximate derivatives 69 | // http://www.mathworks.co.jp/help/techdoc/ref/diff.html 70 | // Input: 71 | // x : Input signal 72 | // x_length : Length of x 73 | // Output: 74 | // y : Output signal 75 | //----------------------------------------------------------------------------- 76 | void diff(double *x, int x_length, double *y); 77 | 78 | //----------------------------------------------------------------------------- 79 | // interp1Q() is the special case of interp1(). 80 | // We can use this function, provided that All periods of x-axis is the same. 81 | // Input: 82 | // x : Origin of the x-axis 83 | // shift : Period of the x-axis 84 | // y : Values at x[n] 85 | // x_length : Length of x (Length of y must be the same) 86 | // xi : Required vector 87 | // xi_length : Length of xi (Length of yi must be the same) 88 | // Output: 89 | // yi : Interpolated vector 90 | // Caution: 91 | // Length of xi and yi must be the same. 92 | //----------------------------------------------------------------------------- 93 | void interp1Q(double x, double shift, double *y, int x_length, double *xi, 94 | int xi_length, double *yi); 95 | 96 | //----------------------------------------------------------------------------- 97 | // randn() generates pseudorandom numbers based on xorshift method. 98 | // Output: 99 | // A generated pseudorandom number 100 | //----------------------------------------------------------------------------- 101 | double randn(void); 102 | 103 | //----------------------------------------------------------------------------- 104 | // fast_fftfilt() carries out the convolution on the frequency domain. 105 | // Input: 106 | // x : Input signal 107 | // x_length : Length of x 108 | // h : Impulse response 109 | // h_length : Length of h 110 | // fft_size : Length of FFT 111 | // forward_real_fft : Struct to speed up the forward FFT 112 | // inverse_real_fft : Struct to speed up the inverse FFT 113 | // Output: 114 | // y : Calculated result. 115 | //----------------------------------------------------------------------------- 116 | void fast_fftfilt(double *x, int x_length, double *h, int h_length, 117 | int fft_size, ForwardRealFFT *forward_real_fft, 118 | InverseRealFFT *inverse_real_fft, double *y); 119 | 120 | //----------------------------------------------------------------------------- 121 | // inv() calculates the inverse matrix of input square matrix. 122 | // Input: 123 | // r : Input square matrix; 124 | // n : Number of dimensions of the input 125 | // Output: 126 | // invr : Calculated inverse matrix. 127 | //----------------------------------------------------------------------------- 128 | void inv(double **r, int n, double **invr); 129 | 130 | //----------------------------------------------------------------------------- 131 | // matlab_std() calculates the standard deviation of the input vector. 132 | // Input: 133 | // x : Input vector 134 | // x_length : Length of x 135 | // Output: 136 | // Calculated standard deviation 137 | //----------------------------------------------------------------------------- 138 | double matlab_std(double *x, int x_length); 139 | 140 | //----------------------------------------------------------------------------- 141 | // wavwrite() write a .wav file. 142 | // Input: 143 | // x : Input signal 144 | // waveLength : Signal length of x [sample] 145 | // fs : Sampling frequency [Hz] 146 | // Nbit : Quantization bit [bit] 147 | // filename : Name of the output signal. 148 | // Caution: 149 | // The variable nbit is not used in this function. 150 | // This function only supports the 16 bit. 151 | //----------------------------------------------------------------------------- 152 | void wavwrite(double *x, int x_length, int fs, int nbit, const char *filename); 153 | 154 | //----------------------------------------------------------------------------- 155 | // wavread() read a .wav file. We cannot recoment to use this function. 156 | // Input: 157 | // filename : Filename to write a file. 158 | // fs : Sampling frequency [Hz] 159 | // nbit : Quantization bit [bit] 160 | // waveLength : Signal length of the output signal [sample]. 161 | // Output: 162 | // Output .wav file (double *) 163 | //----------------------------------------------------------------------------- 164 | double *wavread(const char *filename, int *fs, int *nbit, int *wav_length); 165 | 166 | #endif // WORLD_MATLABFUNCTIONS_H_ 167 | -------------------------------------------------------------------------------- /core/world/star.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // Spectral envelope estimation based on STAR (Synchronous Technique and Adroit 6 | // Restoration). 7 | // Please see styleguide.txt to show special rules on names of variables 8 | // and fnctions. 9 | //----------------------------------------------------------------------------- 10 | #include 11 | #include 12 | #include 13 | #include "./star.h" 14 | #include "./matlabfunctions.h" 15 | #include "./constant_numbers.h" 16 | 17 | namespace { 18 | 19 | //----------------------------------------------------------------------------- 20 | // AdroitSmoothing() carries out the spectral smoothing by rectangular window 21 | // whose length is F0. 22 | // This function is only used in StarGeneralBody(). 23 | //----------------------------------------------------------------------------- 24 | void AdroitSmoothing(double current_f0, int fs, int fft_size, 25 | double *power_spectrum, double *star_spectrum) { 26 | int boundary = static_cast(current_f0 / 27 | (static_cast(fs) / fft_size)) + 1; 28 | double *mirroring_spectrum = new double[fft_size + boundary * 2 + 1]; 29 | 30 | for (int i = 0; i < boundary; ++i) 31 | mirroring_spectrum[i] = power_spectrum[boundary - i]; 32 | for (int i = boundary; i < fft_size / 2 + boundary; ++i) 33 | mirroring_spectrum[i] = power_spectrum[i - boundary]; 34 | for (int i = fft_size / 2 + boundary; 35 | i < fft_size / 2 + boundary * 2 + 1; ++i) 36 | mirroring_spectrum[i] = 37 | power_spectrum[fft_size / 2 - (i - (fft_size / 2 + boundary)) - 1]; 38 | 39 | int tmp = static_cast(current_f0 * fft_size / fs); 40 | 41 | double *mirroring_segment = new double[fft_size * 2]; 42 | mirroring_segment[0] = log(mirroring_spectrum[0]) * fs / fft_size; 43 | for (int i = 1; i < fft_size / 2 + boundary * 2 + 1; ++i) 44 | mirroring_segment[i] = log(mirroring_spectrum[i]) * fs / fft_size + 45 | mirroring_segment[i - 1]; 46 | 47 | double *frequency_axis = new double[fft_size / 2 + 1]; 48 | for (int i = 0; i <= fft_size / 2; ++i) 49 | frequency_axis[i] = static_cast(i) / fft_size * 50 | fs - current_f0 / 2.0; 51 | 52 | double *low_levels = new double[fft_size / 2 + 1]; 53 | double *high_levels = new double[fft_size / 2 + 1]; 54 | double origin_of_mirroring_axis = 55 | -(static_cast(boundary) - 0.5) * fs / fft_size; 56 | double discrete_frequency_interval = static_cast(fs) / fft_size; 57 | 58 | interp1Q(origin_of_mirroring_axis, discrete_frequency_interval, 59 | mirroring_segment, fft_size / 2 + boundary * 2 + 1, frequency_axis, 60 | fft_size / 2 + 1, low_levels); 61 | for (int i = 0; i <= fft_size / 2; ++i) frequency_axis[i] += current_f0; 62 | 63 | interp1Q(origin_of_mirroring_axis, discrete_frequency_interval, 64 | mirroring_segment, fft_size / 2 + boundary * 2 + 1, frequency_axis, 65 | fft_size / 2 + 1, high_levels); 66 | 67 | for (int i = 0; i <= fft_size / 2; ++i) 68 | star_spectrum[i] = exp((high_levels[i] - low_levels[i]) / current_f0); 69 | 70 | delete[] low_levels; 71 | delete[] high_levels; 72 | delete[] mirroring_segment; 73 | delete[] frequency_axis; 74 | delete[] mirroring_spectrum; 75 | } 76 | 77 | //----------------------------------------------------------------------------- 78 | // GetPowerSpectrum() carries out (1) designing the window, 79 | // (2) windowing the waveform and (3) calculation of the power_spectrum 80 | //----------------------------------------------------------------------------- 81 | void GetPowerSpectrum(double *x, int x_length, int fs, double current_f0, 82 | double temporal_position, ForwardRealFFT *forward_real_fft, 83 | double *power_spectrum) { 84 | int half_window_length = matlab_round(3.0 * fs / current_f0 / 2.0); 85 | int *base_index = new int[half_window_length * 2 + 1]; 86 | int *index = new int[half_window_length * 2 + 1]; 87 | 88 | for (int i = -half_window_length; i <= half_window_length; ++i) 89 | base_index[i + half_window_length] = i; 90 | for (int i = 0; i <= half_window_length * 2; ++i) 91 | index[i] = MyMin(x_length, MyMax(1, 92 | matlab_round(temporal_position * fs + 1 + base_index[i]))) - 1; 93 | 94 | // Designing of the window function 95 | double *window = new double[half_window_length * 2 + 1]; 96 | double average = 0.0; 97 | double position; 98 | for (int i = 0; i <= half_window_length * 2; ++i) { 99 | position = static_cast(base_index[i]) / fs / (3.0 / 2.0) + 100 | (temporal_position * fs - matlab_round(temporal_position * fs)) / fs; 101 | window[i] = 0.5 * cos(world::kPi * position * current_f0) + 0.5; 102 | average += window[i] * window[i]; 103 | } 104 | average = sqrt(average); 105 | for (int i = 0; i <= half_window_length * 2; ++i) 106 | window[i] /= average; 107 | 108 | // Windowing and FFT 109 | for (int i = 0; i <= half_window_length * 2; ++i) 110 | forward_real_fft->waveform[i] = x[index[i]] * window[i]; 111 | for (int i = half_window_length * 2 + 1; i < forward_real_fft->fft_size; ++i) 112 | forward_real_fft->waveform[i] = 0.0; 113 | fft_execute(forward_real_fft->forward_fft); 114 | 115 | // Calculation of the power spectrum. 116 | for (int i = 1; i <= forward_real_fft->fft_size / 2; ++i) 117 | power_spectrum[i] = forward_real_fft->spectrum[i][0] * 118 | forward_real_fft->spectrum[i][0] + 119 | forward_real_fft->spectrum[i][1] * 120 | forward_real_fft->spectrum[i][1]; 121 | power_spectrum[0] = power_spectrum[1]; 122 | 123 | delete[] window; 124 | delete[] base_index; 125 | delete[] index; 126 | } 127 | 128 | //----------------------------------------------------------------------------- 129 | // StarGeneralBody() calculates a spectral envelope at a temporal position. 130 | // This function is only used in Star(). 131 | // Caution: 132 | // windowed_waveform, y_spectrum and forward_fft is allocated in advance in 133 | // Star() to speed up the processing. If you want to develop real-time 134 | // application, you should modify this function not to use these arguments 135 | // and edit this function. 136 | //----------------------------------------------------------------------------- 137 | void StarGeneralBody(double *x, int x_length, int fs, double current_f0, 138 | double temporal_position, ForwardRealFFT *forward_real_fft, 139 | double * star_spectrum) { 140 | double *power_spectrum = new double[forward_real_fft->fft_size]; 141 | 142 | // Synchronous analysis 143 | GetPowerSpectrum(x, x_length, fs, current_f0, temporal_position, 144 | forward_real_fft, power_spectrum); 145 | 146 | // Adroit smoothing 147 | AdroitSmoothing(current_f0, fs, forward_real_fft->fft_size, 148 | power_spectrum, star_spectrum); 149 | 150 | delete[] power_spectrum; 151 | } 152 | 153 | } // namespace 154 | 155 | void Star(double *x, int x_length, int fs, double *time_axis, double *f0, 156 | int f0_length, double **spectrogram) { 157 | double frame_period = (time_axis[1] - time_axis[0]) * 1000.0; 158 | 159 | int fft_size = GetFFTSizeForStar(fs); 160 | 161 | double *star_spectrum = new double[fft_size]; 162 | 163 | // Following three variables are shared in StarGeneralBody() 164 | ForwardRealFFT forward_real_fft = {0}; 165 | InitializeForwardRealFFT(fft_size, &forward_real_fft); 166 | 167 | double current_f0; 168 | for (int i = 0; i < f0_length; ++i) { 169 | current_f0 = f0[i] <= world::kFloorF0 ? world::kDefaultF0 : f0[i]; 170 | StarGeneralBody(x, x_length, fs, current_f0, time_axis[i], 171 | &forward_real_fft, star_spectrum); 172 | for (int j = 0; j <= fft_size / 2; ++j) 173 | spectrogram[i][j] = star_spectrum[j]; 174 | } 175 | 176 | DestroyForwardRealFFT(&forward_real_fft); 177 | delete[] star_spectrum; 178 | } 179 | 180 | int GetFFTSizeForStar(int fs) { 181 | return static_cast(pow(2.0, 1.0 + 182 | static_cast(log(3.0 * fs / world::kFloorF0 + 1) / world::kLog2))); 183 | } 184 | -------------------------------------------------------------------------------- /core/world/synthesis_ap.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // Voice synthesis based on f0, spectrogram and aperiodicity. 6 | // forward_real_fft, inverse_real_fft and minimum_phase are used to speed up. 7 | //----------------------------------------------------------------------------- 8 | #include 9 | #include 10 | #include 11 | #include "./synthesis_ap.h" 12 | #include "./common.h" 13 | #include "./tandem_ap.h" 14 | #include "./matlabfunctions.h" 15 | #include "./constant_numbers.h" 16 | 17 | namespace { 18 | 19 | //----------------------------------------------------------------------------- 20 | // GetGlottalPulse() calculates the glottal pulse based on periodic response 21 | // and aperiodic response. 22 | //----------------------------------------------------------------------------- 23 | void GetGlottalPulse(double f0, int fft_size, double *periodic_response, 24 | double *aperiodic_response, int noise_size, double *y) { 25 | if (f0 != 0) { 26 | for (int i = 0; i < fft_size; ++i) 27 | y[i] = periodic_response[i] * sqrt(static_cast(noise_size)) + 28 | aperiodic_response[i]; 29 | } else { 30 | for (int i = 0; i < fft_size; ++i) 31 | y[i] = aperiodic_response[i]; 32 | } 33 | for (int i = 0; i < fft_size; ++i) y[i] /= fft_size; 34 | } 35 | 36 | //----------------------------------------------------------------------------- 37 | // CalculateAperiodicity() transforms the input aperiodicity in each band 38 | // into the aperiodicity spectrum whose length is fft_size. 39 | //----------------------------------------------------------------------------- 40 | void CalculateAperiodicity(double *aperiodicity, int number_of_bands, 41 | int fft_size, double f0, int fs, double target_f0, double *periodic_spec) { 42 | if (f0 == 0) { 43 | for (int i = 0; i <= fft_size / 2; ++i) periodic_spec[i] = 0.0; 44 | return; 45 | } 46 | double *ap = new double[number_of_bands + 1]; 47 | double *axis = new double[number_of_bands + 1]; 48 | double *w = new double[fft_size / 2 + 1]; 49 | double *tmp_ap = new double[fft_size / 2 + 1]; 50 | 51 | double *cutoff_list = new double[number_of_bands]; 52 | for (int i = 0; i < number_of_bands; ++i) 53 | cutoff_list[i] = fs / pow(2.0, i + 2.0); 54 | 55 | const double kMySafeGuardLogMinimum = -27.631021115928547; 56 | ap[0] = kMySafeGuardLogMinimum; 57 | axis[0] = 0.0; 58 | for (int i = 0; i < number_of_bands - 1; ++i) { 59 | ap[i + 1] = log(aperiodicity[i]); 60 | axis[i + 1] = cutoff_list[number_of_bands - i - 2]; 61 | } 62 | ap[number_of_bands] = log(aperiodicity[number_of_bands - 1]); 63 | axis[number_of_bands] = fs / 2.0; 64 | 65 | double stretching_factor = MyMax(f0, target_f0) / target_f0; 66 | for (int i = 0; i <= fft_size / 2; ++i) 67 | w[i] = static_cast(i * fs) / fft_size; 68 | interp1(axis, ap, number_of_bands + 1, w, fft_size / 2 + 1, tmp_ap); 69 | for (int i = 0; i < number_of_bands - 1; ++i) 70 | axis[i + 1] *= stretching_factor; 71 | axis[number_of_bands] = fs / 2.0 * stretching_factor; 72 | 73 | interp1(axis, ap, number_of_bands + 1, w, fft_size / 2 + 1, periodic_spec); 74 | 75 | for (int i = 0; i <= fft_size / 2; ++i) 76 | periodic_spec[i] = 1.0 - 77 | MyMin(exp(tmp_ap[i] * 2.0), exp(periodic_spec[i] * 2.0)); 78 | 79 | delete[] tmp_ap; 80 | delete[] cutoff_list; 81 | delete[] w; 82 | delete[] axis; 83 | delete[] ap; 84 | } 85 | 86 | //----------------------------------------------------------------------------- 87 | // GetAperiodicResponse() calculates an aperiodic response. 88 | //----------------------------------------------------------------------------- 89 | void GetAperiodicResponse(int noise_size, int fft_size, 90 | double *spectrum, double *aperiodic_ratio, 91 | ForwardRealFFT *forward_real_fft, InverseRealFFT *inverse_real_fft, 92 | MinimumPhaseAnalysis *minimum_phase, 93 | double *aperiodic_response) { 94 | for (int i = 0; i < noise_size; ++i) forward_real_fft->waveform[i] = randn(); 95 | for (int i = noise_size; i < fft_size; ++i) 96 | forward_real_fft->waveform[i] = 0.0; 97 | fft_execute(forward_real_fft->forward_fft); 98 | 99 | for (int i = 0; i <= minimum_phase->fft_size / 2; ++i) 100 | minimum_phase->log_spectrum[i] = 101 | log(spectrum[i] * 102 | ((1 - aperiodic_ratio[i]) + world::kMySafeGuardMinimum)) / 2.0; 103 | GetMinimumPhaseSpectrum(minimum_phase); 104 | 105 | for (int i = 0; i <= fft_size / 2; ++i) { 106 | inverse_real_fft->spectrum[i][0] = 107 | minimum_phase->minimum_phase_spectrum[i][0] * 108 | forward_real_fft->spectrum[i][0] - 109 | minimum_phase->minimum_phase_spectrum[i][1] * 110 | forward_real_fft->spectrum[i][1]; 111 | inverse_real_fft->spectrum[i][1] = 112 | minimum_phase->minimum_phase_spectrum[i][0] * 113 | forward_real_fft->spectrum[i][1] + 114 | minimum_phase->minimum_phase_spectrum[i][1] * 115 | forward_real_fft->spectrum[i][0]; 116 | } 117 | fft_execute(inverse_real_fft->inverse_fft); 118 | for (int i = 0; i < fft_size; ++i) 119 | aperiodic_response[i] = inverse_real_fft->waveform[i]; 120 | } 121 | 122 | //----------------------------------------------------------------------------- 123 | // GetPeriodicResponse() calculates an aperiodic response. 124 | //----------------------------------------------------------------------------- 125 | void GetPeriodicResponse(int fft_size, double *spectrum, 126 | double *aperiodic_ratio, InverseRealFFT *inverse_real_fft, 127 | MinimumPhaseAnalysis *minimum_phase, 128 | double *periodic_response) { 129 | for (int i = 0; i <= minimum_phase->fft_size / 2; ++i) 130 | minimum_phase->log_spectrum[i] = 131 | log(spectrum[i] * aperiodic_ratio[i]) / 2.0; 132 | GetMinimumPhaseSpectrum(minimum_phase); 133 | for (int i = 0; i <= fft_size / 2; ++i) { 134 | inverse_real_fft->spectrum[i][0] = 135 | minimum_phase->minimum_phase_spectrum[i][0]; 136 | inverse_real_fft->spectrum[i][1] = 137 | minimum_phase->minimum_phase_spectrum[i][1]; 138 | } 139 | fft_execute(inverse_real_fft->inverse_fft); 140 | for (int i = 0; i < fft_size; ++i) 141 | periodic_response[i] = inverse_real_fft->waveform[i]; 142 | } 143 | 144 | //----------------------------------------------------------------------------- 145 | // GetOneFrameSegment() calculates a glottal pulse at a time. 146 | //----------------------------------------------------------------------------- 147 | void GetOneFrameSegment(double *f0, double **spectrogram, int fft_size, 148 | double **aperiodicity, int number_of_bands, double target_f0, 149 | double frame_period, double current_time, int fs, double default_f0, 150 | ForwardRealFFT *forward_real_fft, InverseRealFFT *inverse_real_fft, 151 | MinimumPhaseAnalysis *minimum_phase, double *y) { 152 | double *aperiodic_ratio = new double[fft_size]; 153 | double *aperiodic_response = new double[fft_size]; 154 | double *periodic_response = new double[fft_size]; 155 | 156 | int current_frame = matlab_round(current_time / (frame_period / 1000.0)); 157 | int noise_size = static_cast((current_time + 1.0 / 158 | (f0[current_frame] == 0.0 ? default_f0 : f0[current_frame])) * fs) - 159 | static_cast(current_time * fs); 160 | 161 | // Calculation of the aperiodicity at each discrete frequency 162 | CalculateAperiodicity(aperiodicity[current_frame], number_of_bands, 163 | fft_size, f0[current_frame], fs, target_f0, aperiodic_ratio); 164 | 165 | // Synthesis of the aperiodic response 166 | GetAperiodicResponse(noise_size, fft_size, spectrogram[current_frame], 167 | aperiodic_ratio, forward_real_fft, inverse_real_fft, minimum_phase, 168 | aperiodic_response); 169 | 170 | // Synthesis of the periodic response. 171 | // If f0 is zero, we cannot synthesize it. 172 | if (f0[current_frame] != 0) { 173 | GetPeriodicResponse(fft_size, spectrogram[current_frame], 174 | aperiodic_ratio, inverse_real_fft, minimum_phase, 175 | periodic_response); 176 | } 177 | 178 | GetGlottalPulse(f0[current_frame], fft_size, periodic_response, 179 | aperiodic_response, noise_size, y); 180 | 181 | delete[] periodic_response; 182 | delete[] aperiodic_response; 183 | delete[] aperiodic_ratio; 184 | } 185 | 186 | } // namespace 187 | 188 | void SynthesisFromAperiodicity(double *f0, int f0_length, double **spectrogram, 189 | int fft_size, double **aperiodicity, int number_of_bands, double target_f0, 190 | double frame_period, int fs, int y_length, double *y) { 191 | double *impulse_response = new double[fft_size]; 192 | 193 | for (int i = 0; i < y_length; ++i) y[i] = 0.0; 194 | 195 | MinimumPhaseAnalysis minimum_phase = {0}; 196 | InitializeMinimumPhaseAnalysis(fft_size, &minimum_phase); 197 | InverseRealFFT inverse_real_fft = {0}; 198 | InitializeInverseRealFFT(fft_size, &inverse_real_fft); 199 | ForwardRealFFT forward_real_fft = {0}; 200 | InitializeForwardRealFFT(fft_size, &forward_real_fft); 201 | 202 | double current_time = 0.0; 203 | int current_position = 0; 204 | int current_frame = 0; 205 | while (1) { 206 | GetOneFrameSegment(f0, spectrogram, fft_size, aperiodicity, 207 | number_of_bands, target_f0, frame_period, current_time, fs, 208 | world::kDefaultF0, &forward_real_fft, &inverse_real_fft, 209 | &minimum_phase, impulse_response); 210 | 211 | for (int i = current_position; 212 | i < MyMin(current_position + fft_size / 2, y_length - 1); ++i) { 213 | y[i] += impulse_response[i - current_position]; 214 | } 215 | 216 | current_time += 1.0 / (f0[current_frame] == 217 | 0.0 ? world::kDefaultF0 : f0[current_frame]); 218 | current_frame = matlab_round(current_time / (frame_period / 1000.0)); 219 | current_position = static_cast(current_time * fs); 220 | if (current_frame >= f0_length) break; 221 | } 222 | 223 | DestroyMinimumPhaseAnalysis(&minimum_phase); 224 | DestroyInverseRealFFT(&inverse_real_fft); 225 | DestroyForwardRealFFT(&forward_real_fft); 226 | 227 | delete[] impulse_response; 228 | } 229 | -------------------------------------------------------------------------------- /core/world/platinum.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // Excitation signal extraction by PLATINUM. 6 | // Ths excitation signal is calculated as the signal that is the convolution of 7 | // the spectrum of windowed signal and inverse function of spectral envelop. 8 | // Please see styleguide.txt to show special rules on names of variables 9 | // and fnctions. 10 | //----------------------------------------------------------------------------- 11 | #include 12 | #include 13 | #include 14 | #include "./platinum.h" 15 | #include "./common.h" 16 | #include "./matlabfunctions.h" 17 | #include "./constant_numbers.h" 18 | 19 | namespace { 20 | 21 | //----------------------------------------------------------------------------- 22 | // GetNearestPulseIndex() calculates the nearest index of pulse_locations. 23 | // Platinum() calculates the residual spectrum in all frame (frame_period 24 | // interval), while the pulse_locations are not calculated in all frame. 25 | // This function is only used GetOneFrameResidualSpec(). 26 | //----------------------------------------------------------------------------- 27 | int GetNearestPulseIndex(int pulse_count, double current_time, 28 | int fs, double *pulse_locations) { 29 | const double kSafeGuradForAmplitude = 100000.0; 30 | double minimum_value = kSafeGuradForAmplitude; // safe guard 31 | 32 | int index, minimum_index; 33 | double tmp; 34 | for (int i = 0; i < pulse_count; ++i) { 35 | tmp = fabs(pulse_locations[i] - current_time); 36 | if (tmp < minimum_value) { 37 | minimum_value = tmp; 38 | minimum_index = i; 39 | } 40 | index = 1 + matlab_round(pulse_locations[minimum_index] * fs); 41 | } 42 | 43 | return index; 44 | } 45 | 46 | //----------------------------------------------------------------------------- 47 | // GetOneFrameResidualSpec() calculates the residual spectrum. 48 | // Residual spectrum is calculated by convoluting the spectrum of widnowed 49 | // waveform and the inverse function of minimum phase spectrum. 50 | //----------------------------------------------------------------------------- 51 | void GetOneFrameResidualSpec(double *x, int x_length, int fs, 52 | double current_time, double current_t0, ForwardRealFFT *forward_real_fft, 53 | MinimumPhaseAnalysis *minimum_phase, double *pulse_locations, 54 | int pulse_count, double *residual_spectrum) { 55 | GetMinimumPhaseSpectrum(minimum_phase); 56 | 57 | int index = GetNearestPulseIndex(pulse_count, 58 | current_time, fs, pulse_locations); 59 | 60 | int window_length = matlab_round(current_t0 * 2.0); 61 | if (window_length + index - matlab_round(current_t0) >= x_length) { 62 | for (int i = 0; i < minimum_phase->fft_size; ++i) 63 | residual_spectrum[i] = randn() * world::kMySafeGuardMinimum; 64 | return; 65 | } 66 | 67 | // Windowing and FFT 68 | for (int i = 0; i < window_length; ++i) 69 | forward_real_fft->waveform[i] = 70 | x[MyMin(x_length - 1, MyMax(0, i + index - matlab_round(current_t0)))] * 71 | (0.5 - 0.5 * cos(2.0 * world::kPi * (i + 1.0) / 72 | (window_length + 1.0))); 73 | for (int i = window_length; i < minimum_phase->fft_size; ++i) 74 | forward_real_fft->waveform[i] = 0.0; 75 | fft_execute(forward_real_fft->forward_fft); 76 | 77 | // Convolution 78 | residual_spectrum[0] = forward_real_fft->spectrum[0][0] / 79 | minimum_phase->minimum_phase_spectrum[0][0]; 80 | double tmp; 81 | for (int i = 0; i < minimum_phase->fft_size / 2 - 1; ++i) { 82 | tmp = minimum_phase->minimum_phase_spectrum[i + 1][0] * 83 | minimum_phase->minimum_phase_spectrum[i + 1][0] + 84 | minimum_phase->minimum_phase_spectrum[i + 1][1] * 85 | minimum_phase->minimum_phase_spectrum[i + 1][1]; 86 | residual_spectrum[i * 2 + 1] = 87 | (minimum_phase->minimum_phase_spectrum[i + 1][0] * 88 | forward_real_fft->spectrum[i + 1][0] + 89 | minimum_phase->minimum_phase_spectrum[i + 1][1] * 90 | forward_real_fft->spectrum[i + 1][1]) / tmp; 91 | residual_spectrum[i * 2 + 2] = 92 | (-minimum_phase->minimum_phase_spectrum[i + 1][1] * 93 | forward_real_fft->spectrum[i + 1][0] + 94 | minimum_phase->minimum_phase_spectrum[i + 1][0] * 95 | forward_real_fft->spectrum[i + 1][1]) / tmp; 96 | } 97 | residual_spectrum[minimum_phase->fft_size - 1] = 98 | forward_real_fft->spectrum[minimum_phase->fft_size / 2][0] / 99 | minimum_phase->minimum_phase_spectrum[minimum_phase->fft_size / 2][0]; 100 | } 101 | 102 | 103 | //----------------------------------------------------------------------------- 104 | // GetWedgeInOneSection() is calculates a wedge in one voiced/unvoiced section. 105 | // This function is only used in GetWedgeList(). 106 | //----------------------------------------------------------------------------- 107 | int GetWedgeInOneSection(double *x, int x_length, int fs, double *f0, 108 | double frame_period, int start_index, int end_index) { 109 | int center_time = (start_index + end_index + 1) / 2; 110 | int t0 = matlab_round((fs / (f0[center_time] == 111 | 0.0 ? world::kDefaultF0 : f0[center_time]))); 112 | int center_index = 113 | matlab_round((1 + center_time) * frame_period * fs / 1000.0); 114 | 115 | int wedge = 0; 116 | double peak_value = 0.0; 117 | double tmp_amplitude; 118 | int tmp_index; 119 | for (int j = 0; j < t0 * 2 + 1; ++j) { 120 | tmp_index = MyMax(0, MyMin(x_length - 1, center_index - t0 + j)); 121 | tmp_amplitude = 122 | fabs(x[tmp_index]); 123 | if (tmp_amplitude > peak_value) { 124 | peak_value = tmp_amplitude; 125 | wedge = tmp_index; 126 | } 127 | } 128 | return wedge; 129 | } 130 | 131 | //----------------------------------------------------------------------------- 132 | // GetWedgeList() calculates the suitable peak amplitude of each voiced 133 | // section. Peak amplitudes are used as "Wedge" to calculate the temporal 134 | // positions used for windowing. 135 | // This function is only used in Platinum(). 136 | //----------------------------------------------------------------------------- 137 | void GetWedgeList(double *x, int x_length, int number_of_voiced_sections, 138 | int *start_list, int *end_list, int fs, double frame_period, double *f0, 139 | int *wedge_list) { 140 | for (int i = 0; i < number_of_voiced_sections; ++i) { 141 | wedge_list[i] = GetWedgeInOneSection(x, x_length, fs, 142 | f0, frame_period, start_list[i], end_list[i]); 143 | } 144 | } 145 | 146 | //----------------------------------------------------------------------------- 147 | // GetTemporalBoundaries() calculates the temporal boundaries in VUV. 148 | // This function is only used in platinum() 149 | //----------------------------------------------------------------------------- 150 | void GetTemporalBoundaries(double *f0, int f0_length, 151 | int number_of_voiced_sections, int *start_list, int *end_list) { 152 | int start_count = 1; 153 | int end_count = 0; 154 | 155 | start_list[0] = 0; 156 | 157 | end_list[number_of_voiced_sections - 1] = f0_length - 1; 158 | for (int i = 1; i < f0_length; ++i) { 159 | if (f0[i] != 0.0 && f0[i - 1] == 0.0) { 160 | end_list[end_count++] = i - 1; 161 | start_list[start_count++] = i; 162 | } 163 | if (f0[i] == 0.0 && f0[i - 1] != 0.0) { 164 | end_list[end_count++] = i - 1; 165 | start_list[start_count++] = i; 166 | } 167 | } 168 | } 169 | 170 | //----------------------------------------------------------------------------- 171 | // GetNumberOfVoicedSections() calculates the number of voiced sections. 172 | // This function is only used in platinum() 173 | //----------------------------------------------------------------------------- 174 | int GetNumberOfVoicedSections(double *f0, int f0_length) { 175 | int number_of_voiced_sections = 0; 176 | for (int i = 1; i < f0_length; ++i) 177 | if (f0[i] != 0.0 && f0[i - 1] == 0.0) number_of_voiced_sections++; 178 | number_of_voiced_sections += number_of_voiced_sections - 1; 179 | if (f0[0] == 0) number_of_voiced_sections++; 180 | if (f0[f0_length - 1] == 0) number_of_voiced_sections++; 181 | 182 | return number_of_voiced_sections; 183 | } 184 | 185 | //----------------------------------------------------------------------------- 186 | // GetPulseLocationsInOneSection() calculates the peak locations in one frame. 187 | // This function is only used in GetPulseLocations(). 188 | //----------------------------------------------------------------------------- 189 | int GetPulseLocationsInOneSection(int fs, int x_length, int start_index, 190 | int end_index, double frame_period, int current_wedge, 191 | double *total_phase, int current_count, double *pulse_locations) { 192 | start_index = 193 | MyMax(0, static_cast(fs * start_index * frame_period / 1000.0)); 194 | end_index = MyMin(x_length - 1, 195 | matlab_round(fs * (end_index + 1.0) * frame_period / 1000.0) -1); 196 | double tmp = total_phase[current_wedge]; 197 | 198 | for (int i = start_index; i < end_index; ++i) 199 | if (fabs(fmod(total_phase[i + 1] - tmp, 2.0 * world::kPi) - 200 | fmod(total_phase[i] - tmp, 2.0 * world::kPi)) > world::kPi / 2.0) 201 | pulse_locations[current_count++] = static_cast(i) / fs; 202 | return current_count; 203 | } 204 | 205 | //----------------------------------------------------------------------------- 206 | // GetTotalPhase() calculates the phase response from f0 contour. 207 | // Sampling period of f0 time_axis[1] - time_axis[0], while the sampling 208 | // period of total_phase is 1 / fs. total_phase is used to calculate the 209 | // temporal positions of glottal pulse. 210 | // This function is only used in GetPulseLocations(). 211 | //----------------------------------------------------------------------------- 212 | void GetTotalPhase(double *f0, int f0_length, int x_length, double *time_axis, 213 | int fs, double *total_phase) { 214 | double *fixed_f0 = new double[f0_length]; 215 | double *time_axis_of_x = new double[x_length]; 216 | double *interpolated_f0 = new double[x_length]; 217 | 218 | for (int i = 0; i < f0_length; ++i) 219 | fixed_f0[i] = f0[i] == 0 ? world::kDefaultF0 : f0[i]; 220 | for (int i = 0; i < x_length; ++i) 221 | time_axis_of_x[i] = static_cast(i) / fs; 222 | 223 | interp1(time_axis, fixed_f0, f0_length, time_axis_of_x, 224 | x_length, interpolated_f0); 225 | total_phase[0] = interpolated_f0[0] * 2.0 * world::kPi / fs; 226 | for (int i = 1; i < x_length; ++i) 227 | total_phase[i] = total_phase[i - 1] + 228 | interpolated_f0[i] * 2.0 * world::kPi / fs; 229 | 230 | delete[] fixed_f0; 231 | delete[] interpolated_f0; 232 | delete[] time_axis_of_x; 233 | } 234 | 235 | //----------------------------------------------------------------------------- 236 | // GetPulseLocations() calculates the temporal positions (maximum peak index) 237 | // for windowing. These positions are calculated in each frame. 238 | // Pulse means "glottal pulse" 239 | // This function is only used in Platinum(). 240 | //----------------------------------------------------------------------------- 241 | int GetPulseLocations(double *x, int x_length, int fs, double *f0, 242 | int f0_length, double *time_axis, double frame_period, 243 | double *pulse_locations) { 244 | int number_of_voiced_sections = GetNumberOfVoicedSections(f0, f0_length); 245 | 246 | int *start_list = new int[number_of_voiced_sections]; 247 | int *end_list = new int[number_of_voiced_sections]; 248 | GetTemporalBoundaries(f0, f0_length, number_of_voiced_sections, 249 | start_list, end_list); 250 | 251 | int *wedge_list = new int[number_of_voiced_sections]; 252 | GetWedgeList(x, x_length, number_of_voiced_sections, start_list, end_list, 253 | fs, frame_period, f0, wedge_list); 254 | 255 | double *total_phase = new double[x_length]; 256 | GetTotalPhase(f0, f0_length, x_length, time_axis, fs, total_phase); 257 | 258 | int pulse_count = 0; 259 | for (int i = 0; i < number_of_voiced_sections; ++i) { 260 | pulse_count = GetPulseLocationsInOneSection(fs, x_length, start_list[i], 261 | end_list[i], frame_period, wedge_list[i], total_phase, pulse_count, 262 | pulse_locations); 263 | } 264 | 265 | delete[] total_phase; 266 | delete[] wedge_list; 267 | delete[] end_list; 268 | delete[] start_list; 269 | return pulse_count; 270 | } 271 | 272 | } // namespace 273 | 274 | void Platinum(double *x, int x_length, int fs, double *time_axis, double *f0, 275 | int f0_length, double **spectrogram, int fft_size, 276 | double **residual_spectrogram) { 277 | double frame_period = (time_axis[1] - time_axis[0]) * 1000.0; 278 | 279 | double *pulse_locations = new double[x_length]; 280 | int pulse_count = GetPulseLocations(x, x_length, fs, f0, f0_length, 281 | time_axis, frame_period, pulse_locations); 282 | 283 | double *residual_spectrum = new double[fft_size]; 284 | for (int i = 0; i < fft_size; ++i) 285 | residual_spectrogram[0][i] = world::kMySafeGuardMinimum; 286 | 287 | // For minimum phase spectrum 288 | MinimumPhaseAnalysis minimum_phase = {0}; 289 | InitializeMinimumPhaseAnalysis(fft_size, &minimum_phase); 290 | // For forward real FFT 291 | ForwardRealFFT forward_real_fft = {0}; 292 | InitializeForwardRealFFT(fft_size, &forward_real_fft); 293 | 294 | double current_f0; 295 | for (int i = 1; i < f0_length; ++i) { 296 | current_f0 = f0[i] <= world::kFloorF0 ? world::kDefaultF0 : f0[i]; 297 | for (int j = 0; j <= fft_size / 2; ++j) 298 | minimum_phase.log_spectrum[j] = log(spectrogram[i][j]) / 2.0; 299 | 300 | GetOneFrameResidualSpec(x, x_length, fs, 301 | i * frame_period / 1000.0, fs / current_f0, 302 | &forward_real_fft, &minimum_phase, pulse_locations, 303 | pulse_count, residual_spectrogram[i]); 304 | } 305 | DestroyMinimumPhaseAnalysis(&minimum_phase); 306 | DestroyForwardRealFFT(&forward_real_fft); 307 | 308 | delete[] residual_spectrum; 309 | delete[] pulse_locations; 310 | } 311 | -------------------------------------------------------------------------------- /core/world/matlabfunctions.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // Matlab functions implemented for WORLD 6 | // Since these functions are implemented as the same function of Matlab, 7 | // the source code does not follow the style guide (Names of variables 8 | // and functions). 9 | // Please see the reference of Matlab to show the usage of functions. 10 | // Caution: 11 | // Since these functions (wavread() and wavwrite())are roughly implemented, 12 | // we recomend more suitable functions provided by other organizations. 13 | //----------------------------------------------------------------------------- 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "./matlabfunctions.h" 20 | #include "./constant_numbers.h" 21 | 22 | #pragma warning(disable : 4996) 23 | 24 | namespace { 25 | //----------------------------------------------------------------------------- 26 | // FilterForDecimate() calculates the coefficients of low-pass filter and 27 | // carries out the filtering. This function is only used for decimate(). 28 | //----------------------------------------------------------------------------- 29 | void FilterForDecimate(double *x, int x_length, int r, double *y) { 30 | double a[3], b[2]; // filter Coefficients 31 | switch (r) { 32 | case 11: // fs : 44100 (default) 33 | a[0] = 2.450743295230728; 34 | a[1] = -2.06794904601978; 35 | a[2] = 0.59574774438332101; 36 | b[0] = 0.0026822508007163792; 37 | b[1] = 0.0080467524021491377; 38 | break; 39 | case 12: // fs : 48000 40 | a[0] = 2.4981398605924205; 41 | a[1] = -2.1368928194784025; 42 | a[2] = 0.62187513816221485; 43 | b[0] = 0.0021097275904709001; 44 | b[1] = 0.0063291827714127002; 45 | break; 46 | case 10: 47 | a[0] = 2.3936475118069387; 48 | a[1] = -1.9873904075111861; 49 | a[2] = 0.5658879979027055; 50 | b[0] = 0.0034818622251927556; 51 | b[1] = 0.010445586675578267; 52 | break; 53 | case 9: 54 | a[0] = 2.3236003491759578; 55 | a[1] = -1.8921545617463598; 56 | a[2] = 0.53148928133729068; 57 | b[0] = 0.0046331164041389372; 58 | b[1] = 0.013899349212416812; 59 | break; 60 | case 8: // fs : 32000 61 | a[0] = 2.2357462340187593; 62 | a[1] = -1.7780899984041358; 63 | a[2] = 0.49152555365968692; 64 | b[0] = 0.0063522763407111993; 65 | b[1] = 0.019056829022133598; 66 | break; 67 | case 7: 68 | a[0] = 2.1225239019534703; 69 | a[1] = -1.6395144861046302; 70 | a[2] = 0.44469707800587366; 71 | b[0] = 0.0090366882681608418; 72 | b[1] = 0.027110064804482525; 73 | break; 74 | case 6: // fs : 24000 and 22050 75 | a[0] = 1.9715352749512141; 76 | a[1] = -1.4686795689225347; 77 | a[2] = 0.3893908434965701; 78 | b[0] = 0.013469181309343825; 79 | b[1] = 0.040407543928031475; 80 | break; 81 | case 5: 82 | a[0] = 1.7610939654280557; 83 | a[1] = -1.2554914843859768; 84 | a[2] = 0.3237186507788215; 85 | b[0] = 0.021334858522387423; 86 | b[1] = 0.06400457556716227; 87 | break; 88 | case 4: // fs : 16000 89 | a[0] = 1.4499664446880227; 90 | a[1] = -0.98943497080950582; 91 | a[2] = 0.24578252340690215; 92 | b[0] = 0.036710750339322612; 93 | b[1] = 0.11013225101796784; 94 | break; 95 | case 3: 96 | a[0] = 0.95039378983237421; 97 | a[1] = -0.67429146741526791; 98 | a[2] = 0.15412211621346475; 99 | b[0] = 0.071221945171178636; 100 | b[1] = 0.21366583551353591; 101 | break; 102 | case 2: // fs : 8000 103 | a[0] = 0.041156734567757189; 104 | a[1] = -0.42599112459189636; 105 | a[2] = 0.041037215479961225; 106 | b[0] = 0.16797464681802227; 107 | b[1] = 0.50392394045406674; 108 | break; 109 | default: 110 | a[0] = 0.0; 111 | a[1] = 0.0; 112 | a[2] = 0.0; 113 | b[0] = 0.0; 114 | b[1] = 0.0; 115 | } 116 | 117 | // Filtering on time domain. 118 | double w[3] = {0.0, 0.0, 0.0}; 119 | double wt; 120 | for (int i = 0; i < x_length; ++i) { 121 | wt = x[i] + a[0] * w[0] + a[1] * w[1] + a[2] * w[2]; 122 | y[i] = b[0] * wt + b[1] * w[0] + b[1] * w[1] + b[0] * w[2]; 123 | w[2] = w[1]; 124 | w[1] = w[0]; 125 | w[0] = wt; 126 | } 127 | } 128 | 129 | //----------------------------------------------------------------------------- 130 | // CheckHeader() checks the .wav header. This function can only support the 131 | // monaural wave file. This function is only used in waveread(). 132 | //----------------------------------------------------------------------------- 133 | bool CheckHeader(FILE *fp) { 134 | char data_check[5]; 135 | fread(data_check, 1, 4, fp); // "RIFF" 136 | data_check[4] = '\0'; 137 | if (0 != strcmp(data_check, "RIFF")) { 138 | printf("RIFF error.\n"); 139 | return false; 140 | } 141 | fseek(fp, 4, SEEK_CUR); 142 | fread(data_check, 1, 4, fp); // "WAVE" 143 | if (0 != strcmp(data_check, "WAVE")) { 144 | printf("WAVE error.\n"); 145 | return false; 146 | } 147 | fread(data_check, 1, 4, fp); // "fmt " 148 | if (0 != strcmp(data_check, "fmt ")) { 149 | printf("fmt error.\n"); 150 | return false; 151 | } 152 | fread(data_check, 1, 4, fp); // 1 0 0 0 153 | if (!(16 == data_check[0] && 0 == data_check[1] && 154 | 0 == data_check[2] && 0 == data_check[3])) { 155 | printf("fmt (2) error.\n"); 156 | return false; 157 | } 158 | fread(data_check, 1, 2, fp); // 1 0 159 | if (!(1 == data_check[0] && 0 == data_check[1])) { 160 | printf("Format ID error.\n"); 161 | return false; 162 | } 163 | fread(data_check, 1, 2, fp); // 1 0 164 | if (!(1 == data_check[0] && 0 == data_check[1])) { 165 | printf("This function cannot support stereo file\n"); 166 | return false; 167 | } 168 | return true; 169 | } 170 | 171 | //----------------------------------------------------------------------------- 172 | // GetParameters() extracts fp, nbit, wav_length from the .wav file 173 | // This function is only used in wavread(). 174 | //----------------------------------------------------------------------------- 175 | bool GetParameters(FILE *fp, int *fs, int *nbit, int *wav_length) { 176 | char data_check[5] = {0}; 177 | data_check[4] = '\0'; 178 | unsigned char for_int_number[4]; 179 | fread(for_int_number, 1, 4, fp); 180 | *fs = 0; 181 | for (int i = 3; i >= 0; --i) *fs = *fs * 256 + for_int_number[i]; 182 | // Quantization 183 | fseek(fp, 6, SEEK_CUR); 184 | fread(for_int_number, 1, 2, fp); 185 | *nbit = for_int_number[0]; 186 | 187 | // Skip until "data" is found. 2011/3/28 188 | while (0 != fread(data_check, 1, 1, fp)) { 189 | if (data_check[0] == 'd') { 190 | fread(&data_check[1], 1, 3, fp); 191 | if (0 != strcmp(data_check, "data")) { 192 | fseek(fp, -3, SEEK_CUR); 193 | } else { 194 | break; 195 | } 196 | } 197 | } 198 | if (0 != strcmp(data_check, "data")) { 199 | printf("data error.\n"); 200 | return false; 201 | } 202 | 203 | fread(for_int_number, 1, 4, fp); // "data" 204 | *wav_length = 0; 205 | for (int i = 3; i >= 0; --i) 206 | *wav_length = *wav_length * 256 + for_int_number[i]; 207 | *wav_length /= (*nbit / 8); 208 | return true; 209 | } 210 | 211 | } // namespace 212 | 213 | void histc(double *x, int x_length, double *edges, int edges_length, 214 | int *index) { 215 | int count = 1; 216 | 217 | int i = 0; 218 | for (; i < edges_length; ++i) { 219 | index[i] = 1; 220 | if (edges[i] >= x[0]) break; 221 | } 222 | for (; i < edges_length; ++i) { 223 | if (edges[i] < x[count]) { 224 | index[i] = count; 225 | } else { 226 | index[i--] = count++; 227 | } 228 | if (count == x_length) break; 229 | } 230 | count--; 231 | for (i++; i < edges_length; ++i) index[i] = count; 232 | } 233 | 234 | void interp1(double *x, double *y, int x_length, double *xi, int xi_length, 235 | double *yi) { 236 | double *h = new double[x_length - 1]; 237 | double *p = new double[xi_length]; 238 | double *s = new double[xi_length]; 239 | int *k = new int[xi_length]; 240 | 241 | for (int i = 0; i < x_length - 1; ++i) h[i] = x[i + 1] - x[i]; 242 | for (int i = 0; i < xi_length; ++i) { 243 | p[i] = i; 244 | k[i] = 0; 245 | } 246 | 247 | histc(x, x_length, xi, xi_length, k); 248 | 249 | for (int i = 0; i < xi_length; ++i) 250 | s[i] = (xi[i] - x[k[i] - 1]) / h[k[i] - 1]; 251 | 252 | for (int i = 0; i < xi_length; ++i) 253 | yi[i] = y[k[i] - 1] + s[i] * (y[k[i]] - y[k[i] - 1]); 254 | 255 | delete[] k; 256 | delete[] s; 257 | delete[] p; 258 | delete[] h; 259 | } 260 | 261 | void decimate(double *x, int x_length, int r, double *y) { 262 | const int kNFact = 9; 263 | double *tmp1 = new double[x_length + kNFact * 2]; 264 | double *tmp2 = new double[x_length + kNFact * 2]; 265 | 266 | for (int i = 0; i < kNFact; ++i) tmp1[i] = 2 * x[0] - x[kNFact - i]; 267 | for (int i = kNFact; i < kNFact + x_length; ++i) tmp1[i] = x[i - kNFact]; 268 | for (int i = kNFact + x_length; i < 2 * kNFact + x_length; ++i) 269 | tmp1[i] = 2 * x[x_length - 1] - x[x_length - 2 - (i - (kNFact + x_length))]; 270 | 271 | FilterForDecimate(tmp1, 2 * kNFact + x_length, r, tmp2); 272 | for (int i = 0; i < 2 * kNFact + x_length; ++i) 273 | tmp1[i] = tmp2[2 * kNFact + x_length - i - 1]; 274 | FilterForDecimate(tmp1, 2 * kNFact + x_length, r, tmp2); 275 | for (int i = 0; i < 2 * kNFact + x_length; ++i) 276 | tmp1[i] = tmp2[2 * kNFact + x_length - i - 1]; 277 | 278 | int nout = x_length / r + 1; 279 | int nbeg = r - r * nout + x_length; 280 | 281 | int count = 0; 282 | for (int i = nbeg; i < x_length + kNFact; i += r) 283 | y[count++] = tmp1[i + kNFact - 1]; 284 | 285 | delete[] tmp1; 286 | delete[] tmp2; 287 | } 288 | 289 | int matlab_round(double x) { 290 | return x > 0 ? static_cast(x + 0.5) : static_cast(x - 0.5); 291 | } 292 | 293 | void diff(double *x, int x_length, double *y) { 294 | for (int i = 0; i < x_length - 1; ++i) y[i] = x[i + 1] - x[i]; 295 | } 296 | 297 | void interp1Q(double x, double shift, double *y, int x_length, double *xi, 298 | int xi_length, double *yi) { 299 | double *xi_fraction = new double[xi_length]; 300 | double *delta_y = new double[x_length]; 301 | int *xi_base = new int[xi_length]; 302 | 303 | double delta_x = shift; 304 | for (int i = 0; i < xi_length; ++i) { 305 | xi_base[i] = static_cast((xi[i] - x) / delta_x); 306 | xi_fraction[i] = (xi[i] - x) / delta_x - xi_base[i]; 307 | } 308 | diff(y, x_length, delta_y); 309 | delta_y[x_length - 1] = 0.0; 310 | 311 | for (int i = 0; i < xi_length; ++i) 312 | yi[i] = y[xi_base[i]] + delta_y[xi_base[i]] * xi_fraction[i]; 313 | 314 | delete xi_fraction; 315 | delete xi_base; 316 | delete delta_y; 317 | } 318 | 319 | double randn(void) { 320 | static unsigned int x = 123456789; 321 | static unsigned int y = 362436069; 322 | static unsigned int z = 521288629; 323 | static unsigned int w = 88675123; 324 | unsigned int t; 325 | t = x ^ (x << 11); 326 | x = y; 327 | y = z; 328 | z = w; 329 | 330 | unsigned int tmp = 0; 331 | for (int i = 0; i < 12; ++i) { 332 | t = x ^ (x << 11); 333 | x = y; 334 | y = z; 335 | z = w; 336 | w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); 337 | tmp += w >> 4; 338 | } 339 | return tmp / 268435456.0 - 6.0; 340 | } 341 | 342 | void fast_fftfilt(double *x, int x_length, double *h, int h_length, 343 | int fft_size, ForwardRealFFT *forward_real_fft, 344 | InverseRealFFT *inverse_real_fft, double *y) { 345 | fft_complex *x_spectrum = new fft_complex[fft_size]; 346 | 347 | for (int i = 0; i < x_length; ++i) 348 | forward_real_fft->waveform[i] = x[i] / fft_size; 349 | for (int i = x_length; i < fft_size; ++i) 350 | forward_real_fft->waveform[i] = 0.0; 351 | fft_execute(forward_real_fft->forward_fft); 352 | for (int i = 0; i <= fft_size / 2; ++i) { 353 | x_spectrum[i][0] = forward_real_fft->spectrum[i][0]; 354 | x_spectrum[i][1] = forward_real_fft->spectrum[i][1]; 355 | } 356 | 357 | for (int i = 0; i < h_length; ++i) 358 | forward_real_fft->waveform[i] = h[i] / fft_size; 359 | for (int i = h_length; i < fft_size; ++i) 360 | forward_real_fft->waveform[i] = 0.0; 361 | fft_execute(forward_real_fft->forward_fft); 362 | 363 | for (int i = 0; i <= fft_size / 2; ++i) { 364 | inverse_real_fft->spectrum[i][0] = 365 | x_spectrum[i][0] * forward_real_fft->spectrum[i][0] - 366 | x_spectrum[i][1] * forward_real_fft->spectrum[i][1]; 367 | inverse_real_fft->spectrum[i][1] = 368 | x_spectrum[i][0] * forward_real_fft->spectrum[i][1] + 369 | x_spectrum[i][1] * forward_real_fft->spectrum[i][0]; 370 | } 371 | fft_execute(inverse_real_fft->inverse_fft); 372 | 373 | for (int i = 0; i < fft_size; ++i) 374 | y[i] = inverse_real_fft->waveform[i]; 375 | 376 | delete[] x_spectrum; 377 | } 378 | 379 | void inv(double **r, int n, double **invr) { 380 | for (int i = 0; i < n; ++i) 381 | for (int j = 0; j < n; ++j) invr[i][j] = 0.0; 382 | for (int i = 0; i < n; ++i) invr[i][i] = 1.0; 383 | 384 | double tmp; 385 | for (int i = 0; i < n; ++i) { 386 | tmp = r[i][i]; 387 | r[i][i] = 1.0; 388 | for (int j = 0; j <= i; ++j) invr[i][j] /= tmp; 389 | for (int j = i + 1; j < n; ++j) r[i][j] /= tmp; 390 | for (int j = i + 1; j < n; ++j) { 391 | tmp = r[j][i]; 392 | for (int k = 0; k <= i; ++k) invr[j][k] -= invr[i][k] * tmp; 393 | for (int k = i; k < n; ++k) r[j][k] -= r[i][k] * tmp; 394 | } 395 | } 396 | 397 | for (int i = n - 1; i >= 0; --i) { 398 | for (int j = 0; j < i; ++j) { 399 | tmp = r[j][i]; 400 | for (int k = 0; k < n; ++k) invr[j][k] -= invr[i][k] * tmp; 401 | } 402 | } 403 | } 404 | 405 | double matlab_std(double *x, int x_length) { 406 | double average = 0.0; 407 | for (int i = 0; i < x_length; ++i) average += x[i]; 408 | average /= x_length; 409 | 410 | double s = 0.0; 411 | for (int i = 0; i < x_length; ++i) s += pow(x[i] - average, 2.0); 412 | s /= (x_length - 1); 413 | 414 | return sqrt(s); 415 | } 416 | 417 | void wavwrite(double *x, int x_length, int fs, int nbit, const char *filename) { 418 | FILE *fp = fopen(filename, "wb"); 419 | if (fp == NULL) { 420 | printf("File cannot be opened.\n"); 421 | return; 422 | } 423 | 424 | char text[4] = {'R', 'I', 'F', 'F'}; 425 | uint32_t long_number = 36 + x_length * 2; 426 | fwrite(text, 1, 4, fp); 427 | fwrite(&long_number, 4, 1, fp); 428 | 429 | text[0] = 'W'; 430 | text[1] = 'A'; 431 | text[2] = 'V'; 432 | text[3] = 'E'; 433 | fwrite(text, 1, 4, fp); 434 | text[0] = 'f'; 435 | text[1] = 'm'; 436 | text[2] = 't'; 437 | text[3] = ' '; 438 | fwrite(text, 1, 4, fp); 439 | 440 | long_number = 16; 441 | fwrite(&long_number, 4, 1, fp); 442 | int16_t short_number = 1; 443 | fwrite(&short_number, 2, 1, fp); 444 | short_number = 1; 445 | fwrite(&short_number, 2, 1, fp); 446 | // long_number = static_cast(fs); 447 | long_number = fs; 448 | fwrite(&long_number, 4, 1, fp); 449 | // long_number = static_cast(fs * 2); 450 | long_number = fs * 2; 451 | fwrite(&long_number, 4, 1, fp); 452 | short_number = 2; 453 | fwrite(&short_number, 2, 1, fp); 454 | short_number = 16; 455 | fwrite(&short_number, 2, 1, fp); 456 | 457 | text[0] = 'd'; 458 | text[1] = 'a'; 459 | text[2] = 't'; 460 | text[3] = 'a'; 461 | fwrite(text, 1, 4, fp); 462 | long_number = x_length * 2; 463 | fwrite(&long_number, 4, 1, fp); 464 | 465 | int16_t tmp_signal; 466 | for (int i = 0; i < x_length; ++i) { 467 | tmp_signal = static_cast(MyMax(-32768, 468 | MyMin(32767, static_cast(x[i] * 32767)))); 469 | fwrite(&tmp_signal, 2, 1, fp); 470 | } 471 | 472 | fclose(fp); 473 | } 474 | 475 | double * wavread(const char* filename, int *fs, int *nbit, int *wav_length) { 476 | FILE *fp = fopen(filename, "rb"); 477 | if (NULL == fp) { 478 | printf("File not found.\n"); 479 | return NULL; 480 | } 481 | 482 | if (CheckHeader(fp) == false) { 483 | fclose(fp); 484 | return NULL; 485 | } 486 | 487 | if (GetParameters(fp, fs, nbit, wav_length) == false) { 488 | fclose(fp); 489 | return NULL; 490 | } 491 | 492 | double *waveform = new double[*wav_length]; 493 | if (waveform == NULL) return NULL; 494 | 495 | int quantization_byte = *nbit / 8; 496 | double zero_line = pow(2.0, *nbit - 1); 497 | double tmp, sign_bias; 498 | unsigned char for_int_number[4]; 499 | for (int i = 0; i < *wav_length; ++i) { 500 | sign_bias = tmp = 0.0; 501 | fread(for_int_number, 1, quantization_byte, fp); // "data" 502 | if (for_int_number[quantization_byte-1] >= 128) { 503 | sign_bias = pow(2.0, *nbit - 1); 504 | for_int_number[quantization_byte - 1] = 505 | for_int_number[quantization_byte - 1] & 0x7F; 506 | } 507 | for (int j = quantization_byte - 1; j >= 0; --j) 508 | tmp = tmp * 256.0 + for_int_number[j]; 509 | waveform[i] = (tmp - sign_bias) / zero_line; 510 | } 511 | fclose(fp); 512 | return waveform; 513 | } 514 | -------------------------------------------------------------------------------- /SimpleSynthesizer.pro.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ProjectExplorer.Project.ActiveTarget 7 | 0 8 | 9 | 10 | ProjectExplorer.Project.EditorSettings 11 | 12 | true 13 | false 14 | true 15 | 16 | Cpp 17 | 18 | CppGlobal 19 | 20 | 21 | 22 | QmlJS 23 | 24 | QmlJSGlobal 25 | 26 | 27 | 2 28 | UTF-8 29 | false 30 | 4 31 | false 32 | true 33 | 1 34 | true 35 | 0 36 | true 37 | 0 38 | 8 39 | true 40 | 1 41 | true 42 | true 43 | true 44 | false 45 | 46 | 47 | 48 | ProjectExplorer.Project.PluginSettings 49 | 50 | 51 | 52 | ProjectExplorer.Project.Target.0 53 | 54 | Desktop Qt 5.2.0 MinGW 32bit 55 | Desktop Qt 5.2.0 MinGW 32bit 56 | qt.520.win32_mingw48.essentials_kit 57 | 0 58 | 0 59 | 1 60 | 61 | C:/Projects/build-SimpleSynthesizer-Desktop_Qt_5_2_0_MinGW_32bit-Debug 62 | 63 | 64 | true 65 | qmake 66 | 67 | QtProjectManager.QMakeBuildStep 68 | false 69 | true 70 | 71 | false 72 | 73 | 74 | true 75 | Make 76 | 77 | Qt4ProjectManager.MakeStep 78 | 79 | false 80 | 81 | 82 | 83 | 2 84 | ビルド 85 | 86 | ProjectExplorer.BuildSteps.Build 87 | 88 | 89 | 90 | true 91 | Make 92 | 93 | Qt4ProjectManager.MakeStep 94 | 95 | true 96 | clean 97 | 98 | 99 | 1 100 | クリーン 101 | 102 | ProjectExplorer.BuildSteps.Clean 103 | 104 | 2 105 | false 106 | 107 | Debug 108 | 109 | Qt4ProjectManager.Qt4BuildConfiguration 110 | 2 111 | true 112 | 113 | 114 | C:/Projects/build-SimpleSynthesizer-Desktop_Qt_5_2_0_MinGW_32bit-Release 115 | 116 | 117 | true 118 | qmake 119 | 120 | QtProjectManager.QMakeBuildStep 121 | false 122 | true 123 | 124 | false 125 | 126 | 127 | true 128 | Make 129 | 130 | Qt4ProjectManager.MakeStep 131 | 132 | false 133 | 134 | 135 | 136 | 2 137 | ビルド 138 | 139 | ProjectExplorer.BuildSteps.Build 140 | 141 | 142 | 143 | true 144 | Make 145 | 146 | Qt4ProjectManager.MakeStep 147 | 148 | true 149 | clean 150 | 151 | 152 | 1 153 | クリーン 154 | 155 | ProjectExplorer.BuildSteps.Clean 156 | 157 | 2 158 | false 159 | 160 | Release 161 | 162 | Qt4ProjectManager.Qt4BuildConfiguration 163 | 0 164 | true 165 | 166 | 2 167 | 168 | 169 | 0 170 | デプロイ 171 | 172 | ProjectExplorer.BuildSteps.Deploy 173 | 174 | 1 175 | ローカルにデプロイ 176 | 177 | ProjectExplorer.DefaultDeployConfiguration 178 | 179 | 1 180 | 181 | 182 | 183 | false 184 | false 185 | false 186 | false 187 | true 188 | 0.01 189 | 10 190 | true 191 | 1 192 | 25 193 | 194 | 1 195 | true 196 | false 197 | true 198 | valgrind 199 | 200 | 0 201 | 1 202 | 2 203 | 3 204 | 4 205 | 5 206 | 6 207 | 7 208 | 8 209 | 9 210 | 10 211 | 11 212 | 12 213 | 13 214 | 14 215 | 216 | 2 217 | 218 | SimpleSynthesizerTest 219 | 220 | Qt4ProjectManager.Qt4RunConfiguration:C:/Projects/SimpleSynthesizer/SimpleSynthesizerTest.pro 221 | 222 | SimpleSynthesizerTest.pro 223 | false 224 | false 225 | 226 | 3768 227 | true 228 | false 229 | false 230 | false 231 | true 232 | 233 | 234 | 235 | false 236 | false 237 | false 238 | false 239 | true 240 | 0.01 241 | 10 242 | true 243 | 1 244 | 25 245 | 246 | 1 247 | true 248 | false 249 | true 250 | valgrind 251 | 252 | 0 253 | 1 254 | 2 255 | 3 256 | 4 257 | 5 258 | 6 259 | 7 260 | 8 261 | 9 262 | 10 263 | 11 264 | 12 265 | 13 266 | 14 267 | 268 | 2 269 | 270 | SimpleSynthesizerApp 271 | 272 | Qt4ProjectManager.Qt4RunConfiguration:C:/Projects/SimpleSynthesizer/SimpleSynthesizerApp.pro 273 | 274 | SimpleSynthesizerApp.pro 275 | false 276 | true 277 | 278 | 3768 279 | true 280 | false 281 | false 282 | false 283 | true 284 | 285 | 2 286 | 287 | 288 | 289 | ProjectExplorer.Project.TargetCount 290 | 1 291 | 292 | 293 | ProjectExplorer.Project.Updater.EnvironmentId 294 | {9778d425-4e50-4a61-8877-ebb50ba38180} 295 | 296 | 297 | ProjectExplorer.Project.Updater.FileVersion 298 | 15 299 | 300 | 301 | -------------------------------------------------------------------------------- /core/world/tandem_ap.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // Aperiodicity based on TANDEM-STRAIGHT. 6 | // This function would be changed in near future. 7 | //----------------------------------------------------------------------------- 8 | #include 9 | #include 10 | #include 11 | #include "./tandem_ap.h" 12 | #include "./matlabfunctions.h" 13 | #include "./constant_numbers.h" 14 | 15 | const double kNormalCutoff = 600.0; 16 | 17 | // Names of these variables are copied by the source code of Matlab. 18 | // The developer does not know the meaning of these names. 19 | typedef struct { 20 | int segmentLength; 21 | int nMargin; 22 | double **w; 23 | double *wsqrt; 24 | double **H; 25 | double **Hw; 26 | double **R; 27 | double **invR; 28 | 29 | double *Hwx; 30 | double *a; 31 | double *Ha; 32 | double *wxHa; 33 | double *wx; 34 | } InternalParameters; 35 | 36 | namespace { 37 | 38 | //----------------------------------------------------------------------------- 39 | // SetInternalParameters() allocates the memory to the struct. 40 | //----------------------------------------------------------------------------- 41 | void SetInternalParameters(int segment_length, int n_margin, 42 | InternalParameters *internal_parameters) { 43 | internal_parameters->segmentLength = segment_length; 44 | internal_parameters->nMargin = n_margin; 45 | internal_parameters->w = new double *[segment_length]; 46 | for (int i = 0; i < segment_length; ++i) 47 | internal_parameters->w[i] = new double[segment_length]; 48 | internal_parameters->wsqrt = new double[segment_length]; 49 | internal_parameters->H = new double *[segment_length]; 50 | for (int i = 0; i < segment_length; ++i) 51 | internal_parameters->H[i] = new double[n_margin * 2]; 52 | internal_parameters->Hw = new double *[n_margin * 2]; 53 | for (int i = 0; i < n_margin * 2; ++i) 54 | internal_parameters->Hw[i] = new double[segment_length]; 55 | internal_parameters->R = new double *[n_margin * 2]; 56 | for (int i = 0; i < n_margin * 2; ++i) 57 | internal_parameters->R[i] = new double[n_margin * 2]; 58 | internal_parameters->invR = new double *[n_margin * 2]; 59 | for (int i = 0; i < n_margin * 2; ++i) 60 | internal_parameters->invR[i] = new double[n_margin * 2]; 61 | internal_parameters->Hwx = new double[n_margin * 2]; 62 | internal_parameters->a = new double[n_margin * 2]; 63 | internal_parameters->Ha = new double[segment_length]; 64 | internal_parameters->wxHa = new double[segment_length]; 65 | internal_parameters->wx = new double[segment_length]; 66 | } 67 | 68 | //----------------------------------------------------------------------------- 69 | // SetInternalParameters() frees the memory of the struct. 70 | //----------------------------------------------------------------------------- 71 | void DestroyInternalParameters(InternalParameters* internal_parameters) { 72 | delete[] internal_parameters->wsqrt; 73 | delete[] internal_parameters->wx; 74 | delete[] internal_parameters->wxHa; 75 | delete[] internal_parameters->Ha; 76 | delete[] internal_parameters->a; 77 | delete[] internal_parameters->Hwx; 78 | for (int i = 0; i < internal_parameters->nMargin * 2; ++i) 79 | delete[] internal_parameters->invR[i]; 80 | delete[] internal_parameters->invR; 81 | for (int i = 0; i < internal_parameters->nMargin * 2; ++i) 82 | delete[] internal_parameters->R[i]; 83 | delete[] internal_parameters->R; 84 | for (int i = 0; i < internal_parameters->nMargin * 2; ++i) 85 | delete[] internal_parameters->Hw[i]; 86 | delete[] internal_parameters->Hw; 87 | for (int i = 0; i < internal_parameters->segmentLength; ++i) 88 | delete[] internal_parameters->H[i]; 89 | delete[] internal_parameters->H; 90 | for (int i = 0; i < internal_parameters->segmentLength; ++i) 91 | delete[] internal_parameters->w[i]; 92 | delete[] internal_parameters->w; 93 | } 94 | 95 | //----------------------------------------------------------------------------- 96 | // Get*() calculates each parameter. The names do not followe the style guide. 97 | // These names are refered by the article. To avoid the confusion, 98 | // we employed the original names. 99 | //----------------------------------------------------------------------------- 100 | void GetH(double *x, int x_length, int segment_length, int index_bias, 101 | int current_position_in_sample, int t0_in_samples, double **H) { 102 | int index; 103 | for (int i = -1; i < 2; ++i) { 104 | for (int j = 0; j < segment_length; ++j) { 105 | index = MyMax(0, MyMin(x_length - 1, 106 | i + current_position_in_sample-index_bias - t0_in_samples + j)); 107 | H[j][i + 1] = x[index]; 108 | index = MyMax(0, MyMin(x_length - 1, 109 | i + current_position_in_sample - index_bias + t0_in_samples + j)); 110 | H[j][i + 4] = x[index]; 111 | } 112 | } 113 | } 114 | 115 | void GetHw(double **H, int segment_length, int n_margin2, double **w, 116 | double **Hw) { 117 | double tmp; 118 | for (int i = 0; i < n_margin2; ++i) { 119 | for (int j = 0; j < segment_length; ++j) { 120 | tmp = 0.0; 121 | for (int k = 0; k < segment_length; ++k) tmp += H[k][i] * w[k][j]; 122 | Hw[i][j] = tmp; 123 | } 124 | } 125 | } 126 | 127 | void GetR(double **Hw, int n_margin2, int segment_length, double **H, 128 | double **R) { 129 | double tmp; 130 | for (int i = 0; i < n_margin2; ++i) { 131 | for (int j = 0; j < n_margin2; ++j) { 132 | tmp = 0.0; 133 | for (int k = 0; k < segment_length; ++k) tmp += Hw[i][k] * H[k][j]; 134 | R[i][j] = tmp; 135 | } 136 | } 137 | } 138 | 139 | void GetHwx(double **Hw, int n_margin2, int segment_length, double *x, 140 | int origin, double *Hwx) { 141 | double tmp; 142 | for (int i = 0; i < n_margin2; ++i) { 143 | tmp = 0.0; 144 | for (int j = 0; j < segment_length; ++j) tmp += Hw[i][j]*x[origin+j]; 145 | Hwx[i] = tmp; 146 | } 147 | } 148 | 149 | void Geta(double **invR, int n_margin2, double *Hwx, double *a) { 150 | double tmp; 151 | for (int i = 0; i < n_margin2; ++i) { 152 | tmp = 0.0; 153 | for (int j = 0; j < n_margin2; ++j) tmp += invR[i][j]*Hwx[j]; 154 | a[i] = tmp; 155 | } 156 | } 157 | 158 | void GetHa(double **H, int segment_length, int n_margin2, double *a, 159 | double *Ha) { 160 | double tmp; 161 | for (int i = 0; i < segment_length; ++i) { 162 | tmp = 0.0; 163 | for (int j = 0; j < n_margin2; ++j) tmp += H[i][j]*a[j]; 164 | Ha[i] = tmp; 165 | } 166 | } 167 | 168 | void GetW(int segment_length, double **w) { 169 | for (int i = 0; i < segment_length; ++i) 170 | for (int j = 0; j < segment_length; ++j) w[i][j] = 0.0; 171 | 172 | for (int i = 0; i < (segment_length - 1) / 2; ++i) { 173 | w[i][i] = 0.5 - 0.5 * cos((i + 1.0) / 174 | (segment_length + 1.0) * 2.0 * world::kPi); 175 | w[segment_length - i - 1][segment_length - i - 1] = w[i][i]; 176 | } 177 | w[(segment_length - 1) / 2][(segment_length - 1) / 2] = 1.0; 178 | } 179 | 180 | double GetStdwxHa(double *wsqrt, int segment_length, double *x, int origin, 181 | double *Ha, double *wxHa) { 182 | for (int i = 0; i < segment_length; ++i) 183 | wxHa[i] = wsqrt[i] * (x[i + origin] - Ha[i]); 184 | return matlab_std(wxHa, segment_length); 185 | } 186 | 187 | double GetStdwx(double *wsqrt, int segment_length, double *x, int origin, 188 | double *wx) { 189 | for (int i = 0; i < segment_length; ++i) wx[i] = wsqrt[i] * x[i + origin]; 190 | return matlab_std(wx, segment_length); 191 | } 192 | 193 | //----------------------------------------------------------------------------- 194 | // f0PredictionResidualFixSegmentW() calculates the aperiodicity in a frequency 195 | // band. 196 | // This function is only used in BandwiseAperiodicity(). 197 | //----------------------------------------------------------------------------- 198 | void f0PredictionResidualFixSegmentW(double *x, int x_length, double fs, 199 | double target_f0, double *temporalPositions, double *vuv, int f0_length, 200 | double initial_time, int duration_ms, int current_band, 201 | double **aperiodicity) { 202 | const int kNMargin = 3; 203 | int segment_length = matlab_round(fs * duration_ms / 2000.0) * 2 + 1; 204 | 205 | InternalParameters internal_parameters = {0}; 206 | SetInternalParameters(segment_length, kNMargin, &internal_parameters); 207 | 208 | GetW(segment_length, internal_parameters.w); 209 | for (int i = 0; i < segment_length; ++i) 210 | internal_parameters.wsqrt[i] = sqrt(internal_parameters.w[i][i]); 211 | 212 | int t0_in_samples = matlab_round(fs / target_f0); 213 | int index_bias = matlab_round(fs / target_f0 / 2.0); 214 | 215 | int current_position_in_sample; 216 | int origin; 217 | for (int i = 0; i < f0_length; ++i) { 218 | current_position_in_sample = 219 | matlab_round(-initial_time + temporalPositions[i] * fs) + 1; 220 | if (vuv[i] != 0.0) { 221 | origin = MyMax(0, MyMin(x_length - 1, 222 | current_position_in_sample - index_bias)); 223 | GetH(x, x_length, segment_length, index_bias, 224 | current_position_in_sample, t0_in_samples, internal_parameters.H); 225 | GetHw(internal_parameters.H, segment_length, kNMargin * 2, 226 | internal_parameters.w, internal_parameters.Hw); 227 | GetR(internal_parameters.Hw, kNMargin * 2, segment_length, 228 | internal_parameters.H, internal_parameters.R); 229 | GetHwx(internal_parameters.Hw, kNMargin * 2, segment_length, 230 | x, origin, internal_parameters.Hwx); 231 | inv(internal_parameters.R, kNMargin * 2, internal_parameters.invR); 232 | Geta(internal_parameters.invR, kNMargin * 2, internal_parameters.Hwx, 233 | internal_parameters.a); 234 | GetHa(internal_parameters.H, segment_length, kNMargin * 2, 235 | internal_parameters.a, internal_parameters.Ha); 236 | aperiodicity[i][current_band] = GetStdwxHa(internal_parameters.wsqrt, 237 | segment_length, x, origin, internal_parameters.Ha, 238 | internal_parameters.wxHa) / GetStdwx(internal_parameters.wsqrt, 239 | segment_length, x, origin, internal_parameters.wx); 240 | } else { // Aperiodicity does not use if the speech is unvoiced. 241 | aperiodicity[i][current_band] = 0.0; 242 | } 243 | } 244 | DestroyInternalParameters(&internal_parameters); 245 | } 246 | 247 | //----------------------------------------------------------------------------- 248 | // GetQMFpairOfFilters() sets the coefficients of QM filter (hHP:41, hLP:37) 249 | // Although this function requires fs as the input, the result does not depend 250 | // on it. 251 | //----------------------------------------------------------------------------- 252 | void GetQMFpairOfFilters(int fs, double *hHP, double *hLP) { 253 | // hHP 254 | hHP[0] = 0.00041447996898231424; 255 | hHP[1] = 0.00078125051417292477; 256 | hHP[2] = -0.0010917236836275842; 257 | hHP[3] = -0.0019867925675967589; 258 | hHP[4] = 0.0020903896961562292; 259 | hHP[5] = 0.0040940570272849346; 260 | hHP[6] = -0.0034025808529816698; 261 | hHP[7] = -0.0074961541272056016; 262 | hHP[8] = 0.0049722633399330637; 263 | hHP[9] = 0.012738791249119802; 264 | hHP[10] = -0.0066960326895749113; 265 | hHP[11] = -0.020694051570247052; 266 | hHP[12] = 0.0084324365650413451; 267 | hHP[13] = 0.033074383758700532; 268 | hHP[14] = -0.010018936738799522; 269 | hHP[15] = -0.054231361405808247; 270 | hHP[16] = 0.011293988915051487; 271 | hHP[17] = 0.10020081367388213; 272 | hHP[18] = -0.012120546202484579; 273 | hHP[19] = -0.31630021039095702; 274 | hHP[20] = 0.51240682580627639; 275 | hHP[21] = -0.31630021039095702; 276 | hHP[22] = -0.012120546202484579; 277 | hHP[23] = 0.10020081367388213; 278 | hHP[24] = 0.011293988915051487; 279 | hHP[25] = -0.054231361405808247; 280 | hHP[26] = -0.010018936738799522; 281 | hHP[27] = 0.033074383758700532; 282 | hHP[28] = 0.0084324365650413451; 283 | hHP[29] = -0.020694051570247052; 284 | hHP[30] = -0.0066960326895749113; 285 | hHP[31] = 0.012738791249119802; 286 | hHP[32] = 0.0049722633399330637; 287 | hHP[33] = -0.0074961541272056016; 288 | hHP[34] = -0.0034025808529816698; 289 | hHP[35] = 0.0040940570272849346; 290 | hHP[36] = 0.0020903896961562292; 291 | hHP[37] = -0.0019867925675967589; 292 | hHP[38] = -0.0010917236836275842; 293 | hHP[39] = 0.00078125051417292477; 294 | hHP[40] = 0.00041447996898231424; 295 | 296 | // hLP 297 | hLP[0] = -0.00065488170077483048; 298 | hLP[1] = 0.00007561994958159384; 299 | hLP[2] = 0.0020408456937895227; 300 | hLP[3] = -0.00074680535322030437; 301 | hLP[4] = -0.0043502235688264931; 302 | hLP[5] = 0.0025966428382642732; 303 | hLP[6] = 0.0076396022827566962; 304 | hLP[7] = -0.0064904118901497852; 305 | hLP[8] = -0.011765804538954506; 306 | hLP[9] = 0.013649908479276255; 307 | hLP[10] = 0.01636866479016021; 308 | hLP[11] = -0.026075976030529347; 309 | hLP[12] = -0.020910294856659444; 310 | hLP[13] = 0.048260725032316647; 311 | hLP[14] = 0.024767846611048111; 312 | hLP[15] = -0.096178467583360641; 313 | hLP[16] = -0.027359756709866623; 314 | hLP[17] = 0.31488052161630042; 315 | hLP[18] = 0.52827343594055032; 316 | hLP[19] = 0.31488052161630042; 317 | hLP[20] = -0.027359756709866623; 318 | hLP[21] = -0.096178467583360641; 319 | hLP[22] = 0.024767846611048111; 320 | hLP[23] = 0.048260725032316647; 321 | hLP[24] = -0.020910294856659444; 322 | hLP[25] = -0.026075976030529347; 323 | hLP[26] = 0.01636866479016021; 324 | hLP[27] = 0.013649908479276255; 325 | hLP[28] = -0.011765804538954506; 326 | hLP[29] = -0.0064904118901497852; 327 | hLP[30] = 0.0076396022827566962; 328 | hLP[31] = 0.0025966428382642732; 329 | hLP[32] = -0.0043502235688264931; 330 | hLP[33] = -0.00074680535322030437; 331 | hLP[34] = 0.0020408456937895227; 332 | hLP[35] = 0.00007561994958159384; 333 | hLP[36] = -0.00065488170077483048; 334 | } 335 | 336 | //----------------------------------------------------------------------------- 337 | // GetSignalsForAperiodicity() calculates the signals used to calculate the 338 | // aperiodicity. low_signal, high_signal and downsampled_high_signal are 339 | // calculated in this function. 340 | // This function is only used in BandwiseAperiodicity() 341 | //----------------------------------------------------------------------------- 342 | void GetSignalsForAperiodicity(int fft_size, double *whole_signal, 343 | int filtered_signal_length, double *hHP, double *hLP, 344 | double *low_signal, double *high_signal, double *downsampled_high_signal) { 345 | ForwardRealFFT forward_real_fft = {0}; 346 | InverseRealFFT inverse_real_fft = {0}; 347 | InitializeForwardRealFFT(fft_size, &forward_real_fft); 348 | InitializeInverseRealFFT(fft_size, &inverse_real_fft); 349 | fast_fftfilt(whole_signal, filtered_signal_length, hHP, 41, 350 | fft_size, &forward_real_fft, &inverse_real_fft, high_signal); 351 | fast_fftfilt(whole_signal, filtered_signal_length, hLP, 37, 352 | fft_size, &forward_real_fft, &inverse_real_fft, low_signal); 353 | DestroyForwardRealFFT(&forward_real_fft); 354 | DestroyInverseRealFFT(&inverse_real_fft); 355 | for (int j = 0; j < filtered_signal_length; j += 2) 356 | downsampled_high_signal[j / 2] = high_signal[j]; 357 | } 358 | 359 | //----------------------------------------------------------------------------- 360 | // UpdateWholeSignal() updates the whole_signal. 361 | // This function is only used in BandwiseAperiodicity(). 362 | //----------------------------------------------------------------------------- 363 | inline int UpdateWholeSignal(int filtered_signal_length, int fft_size, 364 | double *low_signal, double *whole_signal) { 365 | for (int i = 0; i < filtered_signal_length; i += 2) 366 | whole_signal[i / 2] = low_signal[i]; 367 | for (int i = matlab_round(filtered_signal_length / 2.0); i < fft_size; i++) 368 | whole_signal[i] = 0.0; 369 | return matlab_round(filtered_signal_length / 2.0) + 82; 370 | } 371 | 372 | //----------------------------------------------------------------------------- 373 | // BandwiseAperiodicity() calculates the aperiodicity in each frequency band. 374 | //----------------------------------------------------------------------------- 375 | void BandwiseAperiodicity(double *x, int x_length, int fs, double *f0, 376 | double *vuv, int f0_length, double *stretched_locations, 377 | int window_length_ms, double **aperiodicity) { 378 | double hHP[41], hLP[37]; 379 | GetQMFpairOfFilters(fs, hHP, hLP); 380 | 381 | int number_of_bands = 382 | static_cast(log(fs / kNormalCutoff) / world::kLog2); 383 | double *cutoff_list = new double[number_of_bands]; 384 | 385 | for (int i = 0; i < number_of_bands; ++i) 386 | cutoff_list[i] = fs / pow(2.0, i + 2.0); 387 | 388 | // 82 = 41 (length of hHP) * 2 389 | int fft_size = GetSuitableFFTSize(x_length + 82); 390 | 391 | double *whole_signal = new double[fft_size]; 392 | double *high_signal = new double[fft_size]; 393 | double *low_signal = new double[fft_size]; 394 | double *downsampled_high_signal = new double[fft_size]; 395 | 396 | int filtered_signal_length = x_length + 82; 397 | 398 | for (int i = 0; i < x_length; ++i) whole_signal[i] = x[i]; 399 | for (int i = x_length; i < fft_size; ++i) whole_signal[i] = 0.0; 400 | 401 | double tmp_fs; 402 | for (int i = 0; i < number_of_bands - 1; ++i) { 403 | tmp_fs = cutoff_list[i] * 2.0; 404 | GetSignalsForAperiodicity(fft_size, whole_signal, filtered_signal_length, 405 | hHP, hLP, low_signal, high_signal, downsampled_high_signal); 406 | 407 | f0PredictionResidualFixSegmentW(downsampled_high_signal, 408 | matlab_round(filtered_signal_length / 2.0), tmp_fs, f0[0], 409 | stretched_locations, vuv, f0_length, 41.0 / 2.0 / tmp_fs, 410 | window_length_ms, number_of_bands - i - 1, aperiodicity); 411 | 412 | // subband separation 413 | filtered_signal_length = UpdateWholeSignal(filtered_signal_length, 414 | fft_size, low_signal, whole_signal); 415 | // update the fft size 416 | fft_size = GetSuitableFFTSize(filtered_signal_length); 417 | } 418 | 419 | filtered_signal_length = (filtered_signal_length - 82) * 2; 420 | f0PredictionResidualFixSegmentW(whole_signal, 421 | matlab_round(filtered_signal_length / 2.0), tmp_fs, f0[0], 422 | stretched_locations, vuv, f0_length, 41.0 / 2.0 / tmp_fs, 423 | window_length_ms, 0, aperiodicity); 424 | 425 | delete[] downsampled_high_signal; 426 | delete[] low_signal; 427 | delete[] high_signal; 428 | delete[] whole_signal; 429 | delete[] cutoff_list; 430 | } 431 | 432 | //----------------------------------------------------------------------------- 433 | // GetInterpolatedSignal() carries out the up sampling (target is 4 * fs) 434 | //----------------------------------------------------------------------------- 435 | void GetInterpolatedSignal(double *x, int x_length, double *interpolated_x) { 436 | interpolated_x[0] = x[0] * 0.14644660940672621; 437 | interpolated_x[1] = x[0] * 0.49999999999999994; 438 | interpolated_x[2] = x[0] * 0.85355339059327373; 439 | for (int i = 0; i < x_length - 1; ++i) { 440 | interpolated_x[i * 4 + 3] = x[i]; 441 | interpolated_x[i * 4 + 4] = x[i] * 0.85355339059327373 + 442 | x[i + 1] * 0.14644660940672621; 443 | interpolated_x[i * 4 + 5] = x[i] * 0.49999999999999994 + 444 | x[i + 1] * 0.49999999999999994; 445 | interpolated_x[i * 4 + 6] = x[i] * 0.14644660940672621 + 446 | x[i + 1] * 0.85355339059327373; 447 | } 448 | interpolated_x[(x_length - 1) * 4 + 3] = x[x_length - 1]; 449 | interpolated_x[(x_length - 1) * 4 + 4] = 450 | x[x_length - 1] * 0.85355339059327373; 451 | interpolated_x[(x_length - 1) * 4 + 5] = 452 | x[x_length - 1] * 0.49999999999999994; 453 | interpolated_x[(x_length - 1) * 4 + 6] = 454 | x[x_length - 1] * 0.14644660940672621; 455 | interpolated_x[(x_length - 1) * 4 + 7] = 456 | interpolated_x[(x_length - 1) * 4 + 8] = 457 | interpolated_x[(x_length - 1) * 4 + 9] = 0.0; 458 | return; 459 | } 460 | 461 | //----------------------------------------------------------------------------- 462 | // GetNormalizedSignal() calculates the signal that the f0 contour is constant. 463 | //----------------------------------------------------------------------------- 464 | int GetNormalizedSignal(double *x, int x_length, int fs, double *f0, 465 | int f0_length, double frame_period, double target_f0, 466 | double **stretched_signal, double **stretched_locations) { 467 | int ix_length = x_length * 4 + 6; 468 | // int ix_length = x_length * 4; 469 | 470 | double *interpolated_x = new double[ix_length]; 471 | GetInterpolatedSignal(x, x_length, interpolated_x); 472 | 473 | double *original_signal_time_axis = new double[ix_length]; 474 | 475 | for (int i = 0; i < ix_length; ++i) 476 | original_signal_time_axis[i] = i / (fs * 4.0); 477 | 478 | double *base_f0 = new double[f0_length + 1]; 479 | double *base_time_axis = new double[f0_length + 1]; 480 | 481 | for (int i = 0; i < f0_length; ++i) { 482 | base_f0[i] = f0[i] == 0.0 ? target_f0 : f0[i]; 483 | base_time_axis[i] = i * frame_period; 484 | } 485 | base_f0[f0_length] = target_f0; 486 | base_time_axis[f0_length] = f0_length * frame_period; 487 | 488 | double *interpolated_f0 = new double[ix_length]; 489 | double *stretched_time_axis = new double[ix_length]; 490 | interp1(base_time_axis, base_f0, f0_length + 1, 491 | original_signal_time_axis, ix_length, interpolated_f0); 492 | 493 | double tmp = target_f0 * fs * 4.0; 494 | stretched_time_axis[0] = interpolated_f0[0] / tmp; 495 | for (int i = 1; i < ix_length; ++i) 496 | stretched_time_axis[i] = stretched_time_axis[i - 1] + 497 | (interpolated_f0[i] / tmp); 498 | 499 | int stretched_signal_length = 500 | static_cast(stretched_time_axis[ix_length - 1] * fs * 4.0) + 1; 501 | double *tmp_time_axis = new double[stretched_signal_length]; 502 | double *stretched_signal4 = new double[stretched_signal_length]; 503 | 504 | for (int i = 0; i < stretched_signal_length; ++i) 505 | tmp_time_axis[i] = i / (fs * 4.0); 506 | interp1(stretched_time_axis, interpolated_x, ix_length, 507 | tmp_time_axis, stretched_signal_length, stretched_signal4); 508 | 509 | *stretched_locations = new double[f0_length]; 510 | interp1(original_signal_time_axis, stretched_time_axis, ix_length, 511 | base_time_axis, f0_length, *stretched_locations); 512 | 513 | // 17 is a safe guard. 514 | *stretched_signal = new double[stretched_signal_length / 4 + 17]; 515 | decimate(stretched_signal4, stretched_signal_length, 4, *stretched_signal); 516 | 517 | delete[] stretched_signal4; 518 | delete[] tmp_time_axis; 519 | delete[] stretched_time_axis; 520 | delete[] base_f0; 521 | delete[] base_time_axis; 522 | delete[] interpolated_f0; 523 | delete[] original_signal_time_axis; 524 | delete[] interpolated_x; 525 | 526 | return 1 + stretched_signal_length / 4; 527 | } 528 | 529 | } // namespace 530 | 531 | int GetNumberOfBands(int fs) { 532 | return static_cast(log(fs / kNormalCutoff) / world::kLog2); 533 | } 534 | 535 | double AperiodicityRatio(double *x, int x_length, int fs, double *f0, 536 | int f0_length, double frame_period, double **aperiodicity) { 537 | double max_f0 = 0.0; 538 | for (int i = 0; i < f0_length; ++i) 539 | max_f0 = max_f0 > f0[i] ? max_f0 : f0[i]; 540 | 541 | const double kMinimumF0ForNormalization = 32.0; 542 | const double kMaximumF0ForNormalization = 200.0; 543 | double target_f0 = MyMax(kMinimumF0ForNormalization, 544 | MyMin(kMaximumF0ForNormalization, max_f0)); 545 | 546 | // The number of two arraies are unknown. 547 | double *stretched_signal = NULL; 548 | double *stretched_locations = NULL; 549 | 550 | int normalized_signal_length = GetNormalizedSignal(x, x_length, fs, f0, 551 | f0_length, frame_period / 1000.0, target_f0, &stretched_signal, 552 | &stretched_locations); 553 | 554 | double *stretched_f0 = new double[f0_length]; 555 | for (int i = 0; i < f0_length; ++i) stretched_f0[i] = target_f0; 556 | 557 | BandwiseAperiodicity(stretched_signal, normalized_signal_length, fs, 558 | stretched_f0, f0, f0_length, stretched_locations, 559 | matlab_round(2000.0 / target_f0), aperiodicity); 560 | 561 | delete[] stretched_f0; 562 | delete[] stretched_signal; 563 | delete[] stretched_locations; 564 | 565 | return target_f0; 566 | } 567 | -------------------------------------------------------------------------------- /core/world/dio.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise. All Rights Reserved. 3 | // Author: morise [at] fc.ritsumei.ac.jp (Masanori Morise) 4 | // 5 | // F0 estimation based on DIO (Distributed Inline-filter Operation). 6 | // Please see styleguide.txt to show special rules on names of variables 7 | // and fnctions. 8 | //----------------------------------------------------------------------------- 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "./dio.h" 14 | #include "./matlabfunctions.h" 15 | #include "./constant_numbers.h" 16 | 17 | //----------------------------------------------------------------------------- 18 | // struct for RawEventByDio() 19 | // "negative" means "zero-crossing point going from positive to negative" 20 | // "positive" means "zero-crossing point going from negative to positive" 21 | //----------------------------------------------------------------------------- 22 | typedef struct { 23 | double *negative_interval_locations; 24 | double *negative_intervals; 25 | int number_of_negatives; 26 | double *positive_interval_locations; 27 | double *positive_intervals; 28 | int number_of_positives; 29 | double *peak_interval_locations; 30 | double *peak_intervals; 31 | int number_of_peaks; 32 | double *dip_interval_locations; 33 | double *dip_intervals; 34 | int number_of_dips; 35 | } ZeroCrossings; 36 | 37 | namespace { 38 | //----------------------------------------------------------------------------- 39 | // GetDownsampledSignal() calculates the spectrum for estimation. 40 | // This function carries out downsampling to speed up the estimation process 41 | // and calculates the spectrum of the downsampled signal. 42 | // This function is only used in the OrigianlDio(). 43 | //----------------------------------------------------------------------------- 44 | void GetSpectrumForEstimation(double *x, int x_length, int fs, int y_length, 45 | int fft_size, int decimation_ratio, fft_complex *y_spectrum) { 46 | double *y = new double[fft_size]; 47 | 48 | // Downsampling 49 | if (decimation_ratio != 1) { 50 | decimate(x, x_length, decimation_ratio, y); 51 | } else { 52 | for (int i = 0; i < x_length; ++i) y[i] = x[i]; 53 | } 54 | 55 | // Removal of the DC component (y = y - mean value of y) 56 | double mean_y = 0.0; 57 | for (int i = 0; i < y_length; ++i) mean_y += y[i]; 58 | mean_y /= y_length; 59 | for (int i = 0; i < y_length; ++i) y[i] -= mean_y; 60 | for (int i = y_length; i < fft_size; ++i) y[i] = 0.0; 61 | 62 | fft_plan forwardFFT = fft_plan_dft_r2c_1d(fft_size, y, y_spectrum, 63 | FFT_ESTIMATE); 64 | fft_execute(forwardFFT); 65 | 66 | fft_destroy_plan(forwardFFT); 67 | delete[] y; 68 | } 69 | 70 | //----------------------------------------------------------------------------- 71 | // GetBestF0Contour() calculates the best f0 contour based on stabilities of 72 | // all candidates. The F0 whose stability is minimum is selected. 73 | // This function is only used in the OrigianlDio(). 74 | //----------------------------------------------------------------------------- 75 | void GetBestF0Contour(int f0_length, double **f0_candidate_map, 76 | double **f0_stability_map, int number_of_bands, double *best_f0_contour) { 77 | double tmp; 78 | for (int i = 0; i < f0_length; ++i) { 79 | tmp = f0_stability_map[0][i]; 80 | best_f0_contour[i] = f0_candidate_map[0][i]; 81 | for (int j = 1; j < number_of_bands; ++j) { 82 | if (tmp > f0_stability_map[j][i]) { 83 | tmp = f0_stability_map[j][i]; 84 | best_f0_contour[i] = f0_candidate_map[j][i]; 85 | } 86 | } 87 | } 88 | } 89 | 90 | //----------------------------------------------------------------------------- 91 | // EliminateUnnaturalChange() is the 1st step of the postprocessing. 92 | // This function eliminates the unnatural change of f0 based on allowed_range. 93 | // This function is only used in GetFinalF0Contour(). 94 | //----------------------------------------------------------------------------- 95 | void EliminateUnnaturalChange(double *f0_before, int f0_length, 96 | int voice_range_minimum, double allowed_range, double *best_f0_contour, 97 | double *f0_after) { 98 | // Initialization 99 | for (int i = 0; i < voice_range_minimum; ++i) f0_before[i] = 0.0; 100 | for (int i = voice_range_minimum; i < f0_length - voice_range_minimum; ++i) 101 | f0_before[i] = best_f0_contour[i]; 102 | for (int i = f0_length - voice_range_minimum; i < f0_length; ++i) 103 | f0_before[i] = 0.0; 104 | 105 | // Processing to prevent the jumping of f0 106 | for (int i = 0; i < voice_range_minimum; ++i) f0_after[i] = 0.0; 107 | for (int i = voice_range_minimum; i < f0_length; ++i) 108 | f0_after[i] = fabs((f0_before[i] - f0_before[i - 1]) / 109 | (world::kMySafeGuardMinimum + f0_before[i])) < 110 | allowed_range ? f0_before[i] : 0.0; 111 | } 112 | 113 | //----------------------------------------------------------------------------- 114 | // EliminateSuspectedF0() is the 2nd step of the postprocessing. 115 | // This function eliminates the suspected f0 in the anlaut and auslaut. 116 | // This function is only used in GetFinalF0Contour(). 117 | //----------------------------------------------------------------------------- 118 | void EliminateSuspectedF0(double *f0_before, int f0_length, 119 | int voice_range_minimum, double *f0_after) { 120 | for (int i = 0; i < f0_length; ++i) f0_after[i] = f0_before[i]; 121 | 122 | for (int i = voice_range_minimum; i < f0_length; ++i) { 123 | for (int j = 1; j < voice_range_minimum; ++j) { 124 | if (f0_before[i - j] == 0 || f0_before[i + j] == 0) { 125 | f0_after[i] = 0.0; 126 | break; 127 | } 128 | } 129 | } 130 | } 131 | 132 | //----------------------------------------------------------------------------- 133 | // CountNumberOfVoicedSections() counts the number of voiced sections. 134 | // This function is only used in GetFinalF0Contour(). 135 | //----------------------------------------------------------------------------- 136 | void CountNumberOfVoicedSections(double *f0_after, int f0_length, 137 | int *positive_index, int *negative_index, int *positive_count, 138 | int *negative_count) { 139 | *positive_count = *negative_count = 0; 140 | for (int i = 1; i < f0_length; ++i) { 141 | if (f0_after[i] == 0 && f0_after[i - 1] != 0) { 142 | negative_index[(*negative_count)++] = i - 1; 143 | } else { 144 | if (f0_after[i - 1] == 0 && f0_after[i] != 0) 145 | positive_index[(*positive_count)++] = i; 146 | } 147 | } 148 | } 149 | 150 | //----------------------------------------------------------------------------- 151 | // CorrectOneF0() corrects the f0[current_index] based on 152 | // f0[current_index + sign]. 153 | // This function is only used in ForwardCorrection() and BackwardCorrection(). 154 | //----------------------------------------------------------------------------- 155 | bool CorrectOneF0(double **f0_map, int number_of_candidates, 156 | double allowed_range, int current_index, int sign, double *f0_after) { 157 | double reference_value1 = f0_after[current_index] * 2 - 158 | f0_after[current_index + sign]; 159 | double reference_value2 = f0_after[current_index]; 160 | double minimum_error = MyMin(fabs(reference_value1 - 161 | f0_map[0][current_index + sign]), 162 | fabs(reference_value2 - f0_map[0][current_index + sign])); 163 | double error_value; 164 | for (int i = 1; i < number_of_candidates; ++i) { 165 | error_value = 166 | MyMin(fabs(reference_value1 - f0_map[i][current_index + sign]), 167 | fabs(reference_value2 - f0_map[i][current_index + sign])); 168 | if (error_value < minimum_error) { 169 | minimum_error = error_value; 170 | f0_after[current_index + sign] = f0_map[i][current_index + sign]; 171 | } 172 | } 173 | if (MyMin(minimum_error / (reference_value1 + world::kMySafeGuardMinimum), 174 | minimum_error / (reference_value2 + world::kMySafeGuardMinimum)) > 175 | allowed_range) { 176 | f0_after[current_index + sign] = 0.0; 177 | return false; 178 | } 179 | return true; 180 | } 181 | 182 | //----------------------------------------------------------------------------- 183 | // ForwardCorrection() is the 4th step of the postprocessing. 184 | // This function corrects the f0 candidates from backward to forward. 185 | // This function is only used in GetFinalF0Contour(). 186 | //----------------------------------------------------------------------------- 187 | void ForwardCorrection(double *f0_before, int f0_length, double **f0_map, 188 | int number_of_candidates, double allowed_range, int *positive_index, 189 | int *negative_index, int negative_count, double *f0_after) { 190 | for (int i = 0; i < f0_length; i++) f0_after[i] = f0_before[i]; 191 | 192 | for (int i = 0; i < negative_count; ++i) { 193 | for (int j = negative_index[i]; j < f0_length - 1; ++j) { 194 | if (false == CorrectOneF0(f0_map, number_of_candidates, allowed_range, 195 | j, 1, f0_after)) break; 196 | if (i != negative_count && j == positive_index[i + 1] - 1) { 197 | negative_index[j] = j; 198 | break; 199 | } 200 | } 201 | } 202 | } 203 | 204 | //----------------------------------------------------------------------------- 205 | // BackwardCorrection() is the 5th step of the postprocessing. 206 | // This function corrects the f0 candidates from forward to backward. 207 | // This function is only used in GetFinalF0Contour(). 208 | //----------------------------------------------------------------------------- 209 | void BackwardCorrection(double *f0_before, int f0_length, double **f0_map, 210 | int number_of_candidates, double allowed_range, int *positive_index, 211 | int *negative_index, int positive_count, double *f0_after) { 212 | for (int i = 0; i < f0_length; ++i) f0_after[i] = f0_before[i]; 213 | 214 | for (int i = positive_count - 1; i >= 0; --i) { 215 | for (int j = positive_index[i] + 1; j > 1; --j) { 216 | if (false == CorrectOneF0(f0_map, number_of_candidates, allowed_range, 217 | j, -1, f0_after)) break; 218 | if (i != 0 && j == negative_index[i - 1] + 1) { 219 | positive_index[j] = j; 220 | break; 221 | } 222 | } 223 | } 224 | } 225 | 226 | //----------------------------------------------------------------------------- 227 | // EliminateInvalidVoicedSection() is the final step of the postprocessing. 228 | // This function eliminates the voiced section whose the 229 | // duration is under 50 msec. 230 | // This function is only used in GetFinalF0Contour(). 231 | //----------------------------------------------------------------------------- 232 | void EliminateInvalidVoicedSection(double *f0_before, int f0_length, 233 | int voice_range_minimum, double *f0_after) { 234 | for (int i = 0; i < f0_length; ++i) f0_after[i] = f0_before[i]; 235 | 236 | int current_index; 237 | for (int i = 0; i < f0_length; ++i) { 238 | if (f0_before[i] == 0.0) continue; 239 | current_index = i; 240 | for (int j = current_index; j < f0_length; ++j) 241 | if (f0_before[j] == 0.0) { 242 | i = j; 243 | break; 244 | } 245 | if ((i - current_index) > voice_range_minimum) continue; 246 | for (int j = i; j >= current_index; --j) f0_after[j] = 0.0; 247 | } 248 | } 249 | 250 | //----------------------------------------------------------------------------- 251 | // GetFinalF0Contour() calculates the optimal f0 contour based on all f0 252 | // candidates. This is the processing after GetBestF0Contour(). 253 | // This function is only used in OriginalDio(). 254 | //----------------------------------------------------------------------------- 255 | void GetFinalF0Contour(double frame_period, int number_of_candidates, int fs, 256 | double **f0_map, double *best_f0_contour, int f0_length, 257 | double *final_f0_contour) { 258 | // memo: 259 | // First and lat 50 msec are not used as the voiced section. 260 | int voice_range_minimum = static_cast(0.5 + 50.0 / frame_period); 261 | // memo: 262 | // This is the tentative value. 263 | double allowed_range = 0.1 * frame_period / 5.0; 264 | 265 | double *f0_tmp1 = new double[f0_length]; 266 | double *f0_tmp2 = new double[f0_length]; 267 | 268 | EliminateUnnaturalChange(f0_tmp1, f0_length, voice_range_minimum, 269 | allowed_range, best_f0_contour, f0_tmp2); 270 | int *positive_index = new int[f0_length]; 271 | int *negative_index = new int[f0_length]; 272 | 273 | EliminateSuspectedF0(f0_tmp2, f0_length, voice_range_minimum, f0_tmp1); 274 | int positive_count, negative_count; 275 | CountNumberOfVoicedSections(f0_tmp1, f0_length, positive_index, 276 | negative_index, &positive_count, &negative_count); 277 | ForwardCorrection(f0_tmp1, f0_length, f0_map, number_of_candidates, 278 | allowed_range, positive_index, negative_index, negative_count, f0_tmp2); 279 | BackwardCorrection(f0_tmp2, f0_length, f0_map, number_of_candidates, 280 | allowed_range, positive_index, negative_index, positive_count, f0_tmp1); 281 | EliminateInvalidVoicedSection(f0_tmp1, f0_length, voice_range_minimum, 282 | final_f0_contour); 283 | 284 | delete[] f0_tmp1; 285 | delete[] f0_tmp2; 286 | delete[] positive_index; 287 | delete[] negative_index; 288 | } 289 | 290 | //----------------------------------------------------------------------------- 291 | // NuttallWindow() calculates the coefficients of Nuttall window whose length 292 | // is y_length. 293 | //----------------------------------------------------------------------------- 294 | void NuttallWindow(int y_length, double *y) { 295 | double tmp; 296 | for (int i = 0; i < y_length; ++i) { 297 | tmp = (i + 1 - (y_length + 1) / 2.0) / (y_length + 1); 298 | y[i] = 0.355768 + 0.487396 * cos(2 * world::kPi * tmp) + 299 | 0.144232 * cos(4.0 * world::kPi * tmp) + 300 | 0.012604 * cos(6.0 * world::kPi * tmp); 301 | } 302 | } 303 | 304 | //----------------------------------------------------------------------------- 305 | // GetFilteredSignal() calculates the signal that is the convolution of the 306 | // input signal and low-pass filter. 307 | // This function is only used in RawEventByDio() 308 | //----------------------------------------------------------------------------- 309 | void GetFilteredSignal(int half_average_length, int fft_size, 310 | fft_complex *x_spectrum, int x_length, double *filtered_signal) { 311 | double *low_pass_filter = new double[fft_size]; 312 | for (int i = half_average_length * 2; i < fft_size; ++i) 313 | low_pass_filter[i] = 0.0; 314 | // Nuttall window is used as a low-pass filter. 315 | // Cutoff frequency depends on the window length. 316 | NuttallWindow(half_average_length * 4, low_pass_filter); 317 | 318 | fft_complex *low_pass_filter_spectrum = new fft_complex[fft_size]; 319 | fft_plan forwardFFT = fft_plan_dft_r2c_1d(fft_size, low_pass_filter, 320 | low_pass_filter_spectrum, FFT_ESTIMATE); 321 | fft_execute(forwardFFT); 322 | 323 | // Convolution 324 | double tmp = x_spectrum[0][0] * low_pass_filter_spectrum[0][0] - 325 | x_spectrum[0][1] * low_pass_filter_spectrum[0][1]; 326 | low_pass_filter_spectrum[0][1] = 327 | x_spectrum[0][0] * low_pass_filter_spectrum[0][1] + 328 | x_spectrum[0][1] * low_pass_filter_spectrum[0][0]; 329 | low_pass_filter_spectrum[0][0] = tmp; 330 | for (int i = 1; i <= fft_size / 2; ++i) { 331 | tmp = x_spectrum[i][0] * low_pass_filter_spectrum[i][0] - 332 | x_spectrum[i][1] * low_pass_filter_spectrum[i][1]; 333 | low_pass_filter_spectrum[i][1] = 334 | x_spectrum[i][0] * low_pass_filter_spectrum[i][1] + 335 | x_spectrum[i][1] * low_pass_filter_spectrum[i][0]; 336 | low_pass_filter_spectrum[i][0] = tmp; 337 | low_pass_filter_spectrum[fft_size - i - 1][0] = 338 | low_pass_filter_spectrum[i][0]; 339 | low_pass_filter_spectrum[fft_size - i - 1][1] = 340 | low_pass_filter_spectrum[i][1]; 341 | } 342 | 343 | fft_plan inverseFFT = fft_plan_dft_c2r_1d(fft_size, 344 | low_pass_filter_spectrum, filtered_signal, FFT_ESTIMATE); 345 | fft_execute(inverseFFT); 346 | 347 | // Compensation of the delay. 348 | int index_bias = half_average_length * 2; 349 | for (int i = 0; i < x_length; ++i) 350 | filtered_signal[i] = filtered_signal[i + index_bias]; 351 | 352 | fft_destroy_plan(inverseFFT); 353 | fft_destroy_plan(forwardFFT); 354 | delete[] low_pass_filter_spectrum; 355 | delete[] low_pass_filter; 356 | } 357 | 358 | //----------------------------------------------------------------------------- 359 | // CheckEvent() returns 1, provided that the input value is over 1. 360 | // This function is for RawEventByDio(). 361 | //----------------------------------------------------------------------------- 362 | inline int CheckEvent(int x) { 363 | return x > 0 ? 1 : 0; 364 | } 365 | 366 | //----------------------------------------------------------------------------- 367 | // ZeroCrossingEngine() calculates the zero crossing points from positive to 368 | // negative. Thanks to Custom.Maid http://custom-made.seesaa.net/ (2012/8/19) 369 | // This function is only used in RawEventByDio(). 370 | //----------------------------------------------------------------------------- 371 | int ZeroCrossingEngine(double *x, int x_length, double fs, 372 | double *interval_locations, double *intervals) { 373 | int *negative_going_points = new int[x_length]; 374 | 375 | for (int i = 0; i < x_length - 1; ++i) 376 | negative_going_points[i] = 0.0 < x[i] && x[i+1] <= 0.0 ? i + 1 : 0; 377 | negative_going_points[x_length - 1] = 0; 378 | 379 | int *edges = new int[x_length]; 380 | int count = 0; 381 | for (int i = 0; i < x_length; ++i) 382 | if (negative_going_points[i] > 0) 383 | edges[count++] = negative_going_points[i]; 384 | 385 | if (count < 2) { 386 | delete[] edges; 387 | delete[] negative_going_points; 388 | return 0; 389 | } 390 | 391 | double *fine_edges = new double[count]; 392 | for (int i = 0; i < count; ++i) 393 | fine_edges[i] = 394 | edges[i] - x[edges[i] - 1] / (x[edges[i]] - x[edges[i] - 1]); 395 | 396 | for (int i = 0; i < count - 1; ++i) { 397 | intervals[i] = fs / (fine_edges[i + 1] - fine_edges[i]); 398 | interval_locations[i] = (fine_edges[i] + fine_edges[i + 1]) / 2.0 / fs; 399 | } 400 | 401 | delete[] fine_edges; 402 | delete[] edges; 403 | delete[] negative_going_points; 404 | return count; 405 | } 406 | 407 | //----------------------------------------------------------------------------- 408 | // GetFourZeroCrossingIntervals() calculates four zero-crossing intervals. 409 | // (1) Zero-crossing going from negative to positive. 410 | // (2) Zero-crossing going from positive to negative. 411 | // (3) Peak, and (4) dip. (3) and (4) are calculated from the zero-crossings of 412 | // the differential of waveform. 413 | //----------------------------------------------------------------------------- 414 | void GetFourZeroCrossingIntervals(double *filtered_signal, int x_length, 415 | double fs, ZeroCrossings *zero_crossings) { 416 | const int kMiximumNumber = x_length / 4; 417 | zero_crossings->negative_interval_locations = new double[kMiximumNumber]; 418 | zero_crossings->positive_interval_locations = new double[kMiximumNumber]; 419 | zero_crossings->peak_interval_locations = new double[kMiximumNumber]; 420 | zero_crossings->dip_interval_locations = new double[kMiximumNumber]; 421 | zero_crossings->negative_intervals = new double[kMiximumNumber]; 422 | zero_crossings->positive_intervals = new double[kMiximumNumber]; 423 | zero_crossings->peak_intervals = new double[kMiximumNumber]; 424 | zero_crossings->dip_intervals = new double[kMiximumNumber]; 425 | 426 | zero_crossings->number_of_negatives = ZeroCrossingEngine(filtered_signal, 427 | x_length, fs, zero_crossings->negative_interval_locations, 428 | zero_crossings->negative_intervals); 429 | 430 | for (int i = 0; i < x_length; ++i) filtered_signal[i] = -filtered_signal[i]; 431 | zero_crossings->number_of_positives = ZeroCrossingEngine(filtered_signal, 432 | x_length, fs, zero_crossings->positive_interval_locations, 433 | zero_crossings->positive_intervals); 434 | 435 | for (int i = 0; i < x_length - 1; ++i) filtered_signal[i] = 436 | filtered_signal[i] - filtered_signal[i + 1]; 437 | zero_crossings->number_of_peaks = ZeroCrossingEngine(filtered_signal, 438 | x_length - 1, fs, zero_crossings->peak_interval_locations, 439 | zero_crossings->peak_intervals); 440 | 441 | for (int i = 0; i < x_length - 1; ++i) 442 | filtered_signal[i] = -filtered_signal[i]; 443 | zero_crossings->number_of_dips = ZeroCrossingEngine(filtered_signal, 444 | x_length - 1, fs, zero_crossings->dip_interval_locations, 445 | zero_crossings->dip_intervals); 446 | } 447 | 448 | //----------------------------------------------------------------------------- 449 | // GetF0CandidatesSub() calculates the f0 candidates and deviations. 450 | // This is the sub-function of GetF0Candidates() and assumes the calculation. 451 | //----------------------------------------------------------------------------- 452 | void GetF0CandidatesSub(double **interpolated_f0_set, int time_axis_length, 453 | double f0_floor, double f0_ceil, double boundary_f0, 454 | double *f0_candidates, double *f0_deviations) { 455 | for (int i = 0; i < time_axis_length; ++i) { 456 | f0_candidates[i] = (interpolated_f0_set[0][i] + 457 | interpolated_f0_set[1][i] + interpolated_f0_set[2][i] + 458 | interpolated_f0_set[3][i]) / 4.0; 459 | 460 | f0_deviations[i] = sqrt(( 461 | (interpolated_f0_set[0][i] - f0_candidates[i]) * 462 | (interpolated_f0_set[0][i] - f0_candidates[i]) + 463 | (interpolated_f0_set[1][i] - f0_candidates[i]) * 464 | (interpolated_f0_set[1][i] - f0_candidates[i]) + 465 | (interpolated_f0_set[2][i] - f0_candidates[i]) * 466 | (interpolated_f0_set[2][i] - f0_candidates[i]) + 467 | (interpolated_f0_set[3][i] - f0_candidates[i]) * 468 | (interpolated_f0_set[3][i] - f0_candidates[i])) / 3.0); 469 | 470 | if (f0_candidates[i] > boundary_f0 || 471 | f0_candidates[i] < boundary_f0 / 2.0 || 472 | f0_candidates[i] > f0_ceil || f0_candidates[i] < f0_floor) { 473 | f0_candidates[i] = 0.0; 474 | f0_deviations[i] = world::kMaximumValue; 475 | } 476 | } 477 | } 478 | 479 | //----------------------------------------------------------------------------- 480 | // GetF0Candidates() calculates the F0 candidates based on the zero-crossings. 481 | // Calculation of F0 candidates is carried out in GetF0CandidatesSub(). 482 | //----------------------------------------------------------------------------- 483 | void GetF0Candidates(const ZeroCrossings *zero_crossings, double boundary_f0, 484 | double f0_floor, double f0_ceil, double *time_axis, int time_axis_length, 485 | double *f0_candidates, double *f0_deviations) { 486 | if (0 == CheckEvent(zero_crossings->number_of_negatives - 2) * 487 | CheckEvent(zero_crossings->number_of_positives - 2) * 488 | CheckEvent(zero_crossings->number_of_peaks - 2) * 489 | CheckEvent(zero_crossings->number_of_dips - 2)) { 490 | for (int i = 0; i < time_axis_length; ++i) { 491 | f0_deviations[i] = world::kMaximumValue; 492 | f0_candidates[i] = 0.0; 493 | } 494 | return; 495 | } 496 | 497 | double *interpolated_f0_set[4]; 498 | for (int i = 0; i < 4; ++i) 499 | interpolated_f0_set[i] = new double[time_axis_length]; 500 | 501 | interp1(zero_crossings->negative_interval_locations, 502 | zero_crossings->negative_intervals, 503 | zero_crossings->number_of_negatives, 504 | time_axis, time_axis_length, interpolated_f0_set[0]); 505 | interp1(zero_crossings->positive_interval_locations, 506 | zero_crossings->positive_intervals, 507 | zero_crossings->number_of_positives, 508 | time_axis, time_axis_length, interpolated_f0_set[1]); 509 | interp1(zero_crossings->peak_interval_locations, 510 | zero_crossings->peak_intervals, zero_crossings->number_of_peaks, 511 | time_axis, time_axis_length, interpolated_f0_set[2]); 512 | interp1(zero_crossings->dip_interval_locations, 513 | zero_crossings->dip_intervals, zero_crossings->number_of_dips, 514 | time_axis, time_axis_length, interpolated_f0_set[3]); 515 | 516 | GetF0CandidatesSub(interpolated_f0_set, time_axis_length, f0_floor, 517 | f0_ceil, boundary_f0, f0_candidates, f0_deviations); 518 | for (int i = 0; i < 4; ++i) delete[] interpolated_f0_set[i]; 519 | } 520 | 521 | //----------------------------------------------------------------------------- 522 | // DestroyZeroCrossings() frees the memory of array in the struct 523 | //----------------------------------------------------------------------------- 524 | void DestroyZeroCrossings(ZeroCrossings *zero_crossings) { 525 | delete[] zero_crossings->negative_interval_locations; 526 | delete[] zero_crossings->positive_interval_locations; 527 | delete[] zero_crossings->peak_interval_locations; 528 | delete[] zero_crossings->dip_interval_locations; 529 | delete[] zero_crossings->negative_intervals; 530 | delete[] zero_crossings->positive_intervals; 531 | delete[] zero_crossings->peak_intervals; 532 | delete[] zero_crossings->dip_intervals; 533 | } 534 | 535 | //----------------------------------------------------------------------------- 536 | // RawEventByDio() calculates the zero-crossings. 537 | // This function is only used in OriginalDio(). 538 | //----------------------------------------------------------------------------- 539 | void RawEventByDio(double boundary_f0, double fs, fft_complex *x_spectrum, 540 | int x_length, int fft_size, double f0_floor, double f0_ceil, 541 | double *time_axis, int time_axis_length, double *f0_deviations, 542 | double *f0_candidates) { 543 | double *filtered_signal = new double[fft_size]; 544 | GetFilteredSignal(matlab_round(fs / boundary_f0 / 2.0), fft_size, x_spectrum, 545 | x_length, filtered_signal); 546 | 547 | ZeroCrossings zero_crossings = {0}; 548 | GetFourZeroCrossingIntervals(filtered_signal, x_length, fs, 549 | &zero_crossings); 550 | 551 | GetF0Candidates(&zero_crossings, boundary_f0, f0_floor, f0_ceil, 552 | time_axis, time_axis_length, f0_candidates, f0_deviations); 553 | 554 | DestroyZeroCrossings(&zero_crossings); 555 | delete[] filtered_signal; 556 | } 557 | 558 | //----------------------------------------------------------------------------- 559 | // GetF0CandidateAndStabilityMap() calculates all f0 candidates and 560 | // their stabilities. 561 | // This function is only used in the OrigianlDio(). 562 | //----------------------------------------------------------------------------- 563 | void GetF0CandidateAndStabilityMap(double *boundary_f0_list, 564 | int number_of_bands, double fs_after_downsampling, int y_length, 565 | double *time_axis, int f0_length, fft_complex *y_spectrum, 566 | int fft_size, double f0_floor, double f0_ceil, 567 | double **f0_candidate_map, double **f0_stability_map) { 568 | double * f0_candidates = new double[f0_length]; 569 | double * f0_deviations = new double[f0_length]; 570 | 571 | // Calculation of the acoustics events (zero-crossing) 572 | for (int i = 0; i < number_of_bands; ++i) { 573 | RawEventByDio(boundary_f0_list[i], fs_after_downsampling, y_spectrum, 574 | y_length, fft_size, f0_floor, f0_ceil, time_axis, f0_length, 575 | f0_deviations, f0_candidates); 576 | for (int j = 0; j < f0_length; ++j) { 577 | // A way to avoid zero division 578 | f0_stability_map[i][j] = f0_deviations[j] / 579 | (f0_candidates[j] + world::kMySafeGuardMinimum); 580 | f0_candidate_map[i][j] = f0_candidates[j]; 581 | } 582 | } 583 | delete[] f0_candidates; 584 | delete[] f0_deviations; 585 | } 586 | 587 | //----------------------------------------------------------------------------- 588 | // OriginalDio() estimates the F0 based on Distributed Inline-filter Operation. 589 | //----------------------------------------------------------------------------- 590 | void OriginalDio(double *x, int x_length, int fs, double frame_period, 591 | double f0_floor, double f0_ceil, double channels_in_octave, int speed, 592 | double *time_axis, double *f0) { 593 | // Calculation of fundamental parameters 594 | // Debug 2012/09/09 0.1.2 595 | // int number_of_bands = 1 + static_cast(log(f0_ceil / f0_floor) 596 | // / world::kLog2 * channels_in_octave); 597 | int number_of_bands = 2 + static_cast(log(f0_ceil / f0_floor) / 598 | world::kLog2 * channels_in_octave); 599 | double * boundary_f0_list = new double[number_of_bands]; 600 | for (int i = 0; i < number_of_bands; ++i) 601 | boundary_f0_list[i] = f0_floor * pow(2.0, i / channels_in_octave); 602 | 603 | // normalization 604 | int decimation_ratio = MyMax(MyMin(speed, 12), 1); 605 | int y_length = (1 + static_cast(x_length / decimation_ratio)); 606 | int fft_size = GetSuitableFFTSize(y_length + 607 | (4 * static_cast(1.0 + fs / boundary_f0_list[0] / 2.0))); 608 | 609 | // Calculation of the spectrum used for the f0 estimation 610 | fft_complex *y_spectrum = new fft_complex[fft_size]; 611 | GetSpectrumForEstimation(x, x_length, fs, y_length, fft_size, 612 | decimation_ratio, y_spectrum); 613 | 614 | // f0map represents all F0 candidates. We can modify them. 615 | double **f0_candidate_map = new double *[number_of_bands]; 616 | double **f0_stability_map = new double *[number_of_bands]; 617 | int f0_length = GetSamplesForDIO(fs, x_length, frame_period); 618 | for (int i = 0; i < number_of_bands; ++i) { 619 | f0_candidate_map[i] = new double[f0_length]; 620 | f0_stability_map[i] = new double[f0_length]; 621 | } 622 | 623 | for (int i = 0; i < f0_length; ++i) 624 | time_axis[i] = i * frame_period / 1000.0; 625 | 626 | double fs_after_downsampling = static_cast(fs) / decimation_ratio; 627 | GetF0CandidateAndStabilityMap(boundary_f0_list, number_of_bands, 628 | fs_after_downsampling, y_length, time_axis, f0_length, y_spectrum, 629 | fft_size, f0_floor, f0_ceil, f0_candidate_map, f0_stability_map); 630 | 631 | // Selection of the best value based on fundamental-ness. 632 | double *best_f0_contour = new double[f0_length]; 633 | GetBestF0Contour(f0_length, f0_candidate_map, f0_stability_map, 634 | number_of_bands, best_f0_contour); 635 | 636 | // Postprocessing to find the best f0-contour. 637 | GetFinalF0Contour(frame_period, number_of_bands, fs, f0_candidate_map, 638 | best_f0_contour, f0_length, f0); 639 | 640 | delete[] best_f0_contour; 641 | delete[] y_spectrum; 642 | for (int i = 0; i < number_of_bands; ++i) { 643 | delete[] f0_stability_map[i]; 644 | delete[] f0_candidate_map[i]; 645 | } 646 | delete[] f0_stability_map; 647 | delete[] f0_candidate_map; 648 | delete[] boundary_f0_list; 649 | } 650 | 651 | } // namespace 652 | 653 | int GetSamplesForDIO(int fs, int x_length, double frame_period) { 654 | return static_cast(x_length / static_cast(fs) / 655 | (frame_period / 1000.0)) + 1; 656 | } 657 | 658 | void Dio(double *x, int x_length, int fs, const DioOption option, 659 | double *time_axis, double *f0) { 660 | OriginalDio(x, x_length, fs, option.frame_period, option.f0_floor, 661 | option.f0_ceil, option.channels_in_octave, option.speed, time_axis, f0); 662 | } 663 | 664 | void Dio(double *x, int x_length, int fs, double frame_period, 665 | double *time_axis, double *f0) { 666 | const double kTargetFs = 4000.0; 667 | const double kF0Floor = 80.0; 668 | const double kF0Ceil = 640; 669 | const double kChannelsInOctave = 2.0; 670 | const int kDecimationRatio = static_cast(fs / kTargetFs); 671 | 672 | OriginalDio(x, x_length, fs, frame_period, kF0Floor, kF0Ceil, 673 | kChannelsInOctave, kDecimationRatio, time_axis, f0); 674 | } 675 | 676 | void InitializeDioOption(DioOption *option) { 677 | // You can change default parameters. 678 | option->channels_in_octave = 2.0; 679 | option->f0_ceil = 640.0; 680 | option->f0_floor = 80.0; 681 | option->frame_period = 5; 682 | // You can use from 1 to 12. 683 | // Default value for 44.1 kHz of fs. 684 | option->speed = 11; 685 | } 686 | --------------------------------------------------------------------------------