├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── data
    └── 1_0_0_0_0_0_0_0.wav
├── include
    ├── add-deltas.h
    ├── am-diag-gmm.h
    ├── common.h
    ├── compressed-matrix.h
    ├── compute-cmvn-stats.h
    ├── feature-mfcc.h
    ├── fstreader.h
    ├── simple-decoder.h
    ├── srfft.h
    ├── transition-model.h
    └── wavereader.h
├── model
    ├── HCLG.fst
    └── final.mdl
└── src
    ├── add-deltas.cpp
    ├── am-diag-gmm.cpp
    ├── common.cpp
    ├── compressed-matrix.cpp
    ├── compute-cmvn-stats.cpp
    ├── decode.cpp
    ├── feature-mfcc.cpp
    ├── fstreader.cpp
    ├── simple-decoder.cpp
    ├── srfft.cpp
    ├── transition-model.cpp
    └── wavereader.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = g++
 2 | CXXFLAGS = -std=c++11 --debug
 3 | INCLUDE	= -I/usr/local/include -I./include
 4 | LDFLAGS = -std=c++11
 5 | LDLIBS	= -lpthread -lm
 6 | EXECUTABLE= bin/main
 7 | SOURCES = $(wildcard src/*.cpp)
 8 | HEADERS = $(wildcard includes/*.h)
 9 | OBJECTS = $(patsubst src/%.cpp, obj/%.o, $(SOURCES))
10 | 
11 | BASE = $(USER)
12 | 
13 | all: $(EXECUTABLE)
14 | 
15 | $(EXECUTABLE): $(OBJECTS)
16 | 	$(CXX) $(LDFLAGS) $(LDLIBS) $^ -o $@
17 | 
18 | obj/%.o: src/%.cpp
19 | 	$(CXX) -c $(CXXFLAGS) $(INCLUDE) $< -o $@
20 | 
21 | clean:
22 | 	rm $(OBJECTS)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # asr-decode
 2 | 
 3 | ## 从 [Kaldi](https://github.com/kaldi-asr/kaldi) 中裁剪的解码推理框架
 4 | 
 5 | ## 实现
 6 | 1. 不依赖OpenFST、OpenBLAS等库实现全部计算，便于学习和移植
 7 | 2. 重现了基础的Viterbi解码(https://github.com/kaldi-asr/kaldi/blob/master/src/gmmbin/gmm-decode-simple.cc)
 8 | 
 9 | ## 使用
10 | ```shell
11 | ./bin/main ./model/final.mdl ./model/HCLG.fst ./data/1_0_0_0_0_0_0_0.wav
12 | ```
13 | 备注:
14 | 1. model文件来源于yesno的基础示例
15 | 2. 从音频计算feature的过程等价于下面过程
16 | ```
17 | #从wave计算mfcc(包含一次compress)
18 | kaldi/src/featbin/compute-mfcc-feats --config=conf/mfcc.conf scp:data/test_yesno/wav.scp ark:- | kaldi/src/featbin/copy-feats --compress=true ark:- ark,scp:test_yesno.ark,test_yesno.scp
19 | 
20 | #从mfcc计算cmvn
21 | kaldi/src/featbin/compute-cmvn-stats --spk2utt=ark:data/test_yesno/spk2utt scp:test_yesno.scp ark,scp:cmvn_test_yesno.ark,cmvn_test_yesno.scp
22 | 
23 | #应用cmvn到mfcc feature(包含一次add deltas)
24 | kaldi/src/featbin/apply-cmvn --utt2spk=ark:data/test_yesno/split1/1/utt2spk scp:cmvn_test_yesno.scp scp:test_yesno.scp ark:- | kaldi/src/featbin/add-deltas ark:- ark:feat.ark
25 | ```
26 | 
27 | ## Todo
28 | 1. 其他解码方式和声学模型并优化，实现[vosk-api](https://github.com/alphacep/vosk-api)的完整功能
29 | 


--------------------------------------------------------------------------------
/data/1_0_0_0_0_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ma-Dan/asr-decode/e83e8ede576bbcddd84d6d3dc2204d2639086d31/data/1_0_0_0_0_0_0_0.wav


--------------------------------------------------------------------------------
/include/add-deltas.h:
--------------------------------------------------------------------------------
 1 | #ifndef ADD_DELTAS
 2 | #define ADD_DELTAS
 3 | 
 4 | #include "common.h"
 5 | 
 6 | struct DeltaFeaturesOptions {
 7 |   int32 order;
 8 |   int32 window;  // e.g. 2; controls window size (window size is 2*window + 1)
 9 |   // the behavior at the edges is to replicate the first or last frame.
10 |   // this is not configurable.
11 | 
12 |   DeltaFeaturesOptions(int32 order = 2, int32 window = 2):
13 |       order(order), window(window) { }
14 | };
15 | 
16 | class DeltaFeatures {
17 |  public:
18 |   // This class provides a low-level function to compute delta features.
19 |   // The function takes as input a matrix of features and a frame index
20 |   // that it should compute the deltas on.  It puts its output in an object
21 |   // of type VectorBase, of size (original-feature-dimension) * (opts.order+1).
22 |   // This is not the most efficient way to do the computation, but it's
23 |   // state-free and thus easier to understand
24 | 
25 |   explicit DeltaFeatures(const DeltaFeaturesOptions &opts);
26 | 
27 |   void Process(const P_Matrix input_feats,
28 |                int32 frame,
29 |                BaseFloat *output_frame) const;
30 |  private:
31 |   DeltaFeaturesOptions opts_;
32 |   std::vector<vector<BaseFloat> > scales_;  // a scaling window for each
33 |   // of the orders, including zero: multiply the features for each
34 |   // dimension by this window.
35 | };
36 | 
37 | void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
38 |                    const P_Matrix input_features,
39 |                    P_Matrix output_features);
40 | 
41 | #endif


--------------------------------------------------------------------------------
/include/am-diag-gmm.h:
--------------------------------------------------------------------------------
 1 | #ifndef AM_DIAG_GMM
 2 | #define AM_DIAG_GMM
 3 | 
 4 | #include "common.h"
 5 | 
 6 | class DiagGmm
 7 | {
 8 |     public:
 9 |         vector<BaseFloat> gconsts;
10 |         bool valid_gconsts;
11 |         vector<BaseFloat> weights;
12 |         Matrix inv_vars;
13 |         Matrix means_invvars;
14 | };
15 | 
16 | class AmDiagGmm {
17 |     public:
18 |         void Read(FILE *fp);
19 |         ~AmDiagGmm();
20 |         DiagGmm& GetPdf(int32 pdf_index) const;
21 | 
22 |     private:
23 |         vector<DiagGmm*> densities;
24 |         DiagGmm* ReadDiagGmm(FILE *fp);
25 |         int32 ComputeGconsts(DiagGmm* diaggmm);
26 |         int32 NumGauss(DiagGmm* diaggmm) const;
27 |         int32 Dim(DiagGmm* diaggmm) const;
28 | };
29 | 
30 | #endif


--------------------------------------------------------------------------------
/include/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_H
 2 | #define COMMON_H
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | #include <map>
 8 | #include <vector>
 9 | #include <limits>
10 | #include <algorithm>
11 | #include <math.h>
12 | 
13 | using namespace std;
14 | 
15 | typedef unsigned char uint8;
16 | typedef signed char int8;
17 | typedef unsigned short uint16;
18 | typedef signed short int16;
19 | typedef int int32;
20 | typedef unsigned int uint32;
21 | typedef long long int64;
22 | typedef unsigned long long uint64;
23 | typedef float BaseFloat;
24 | 
25 | #define SAFE_FREE(x) if(x) {free(x); x=NULL;}
26 | 
27 | #ifndef M_2PI
28 | #define M_2PI 6.283185307179586476925286766559005
29 | #endif
30 | 
31 | #ifndef M_LOG_2PI
32 | #define M_LOG_2PI 1.8378770664093454835606594728112
33 | #endif
34 | 
35 | #ifndef FLT_EPSILON
36 | #define FLT_EPSILON 1.19209290e-7f
37 | #endif
38 | 
39 | typedef struct tagMatrix
40 | {
41 |     int32 cols;
42 |     int32 rows;
43 |     int32 stride;
44 |     vector<BaseFloat> data;
45 | } Matrix, *P_Matrix;
46 | 
47 | typedef struct tagMatrixDouble
48 | {
49 |     int32 cols;
50 |     int32 rows;
51 |     int32 stride;
52 |     vector<double> data;
53 | } MatrixDouble, *P_MatrixDouble;
54 | 
55 | void ReadToken(FILE *fp, char* s);
56 | void ReadIntegerVector(FILE *fp, vector<int32> *v);
57 | void ReadBasicType(FILE *fp, int32 *t);
58 | void ReadBasicType(FILE *fp, BaseFloat *t);
59 | void ReadFloatVectors(FILE *fp, vector<BaseFloat> *v);
60 | void ReadFloatMatrix(FILE *fp, P_Matrix m);
61 | BaseFloat ReadMatrix(P_Matrix m, int32 row, int32 col);
62 | 
63 | #endif


--------------------------------------------------------------------------------
/include/compressed-matrix.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMPRESSED_MATRIX
 2 | #define COMPRESSED_MATRIX
 3 | 
 4 | #include "common.h"
 5 | 
 6 | struct GlobalHeader {
 7 |     int32 format;     // Represents the enum DataFormat.
 8 |     float min_value;  // min_value and range represent the ranges of the integer
 9 |                       // data in the kTwoByte and kOneByte formats, and the
10 |                       // range of the PerColHeader uint16's in the
11 |                       // kOneByteWithColheaders format.
12 |     float range;
13 |     int32 num_rows;
14 |     int32 num_cols;
15 | };
16 | 
17 | struct PerColHeader {
18 |     uint16 percentile_0;
19 |     uint16 percentile_25;
20 |     uint16 percentile_75;
21 |     uint16 percentile_100;
22 | };
23 | 
24 | class CompressedMatrix {
25 |     public:
26 |         CompressedMatrix(): data_(NULL) { }
27 |         ~CompressedMatrix() { Clear(); }
28 | 
29 |         void Clear();
30 | 
31 |         void CopyFromMat(const P_Matrix mat);
32 |         void ComputeGlobalHeader(const P_Matrix mat, GlobalHeader *header);
33 |         static int32 DataSize(const GlobalHeader &header);
34 |         static void* AllocateData(int32 num_bytes);
35 |         void CopyToMat(P_Matrix mat) const;
36 | 
37 |     private:
38 |         void GetMinMax(const P_Matrix mat, BaseFloat* pMin, BaseFloat* pMax);
39 |         static void CompressColumn(const GlobalHeader &global_header,
40 |                              const BaseFloat *data, int32 stride,
41 |                              int32 num_rows, PerColHeader *header,
42 |                              uint8 *byte_data);
43 |         static void ComputeColHeader(const GlobalHeader &global_header,
44 |                                const BaseFloat *data, int32 stride,
45 |                                int32 num_rows, PerColHeader *header);
46 | 
47 |         static inline uint16 FloatToUint16(const GlobalHeader &global_header,
48 |                                      float value);
49 | 
50 |         static inline float Uint16ToFloat(const GlobalHeader &global_header,
51 |                                     uint16 value);
52 | 
53 |         // this is used only in the kOneByteWithColHeaders compression format.
54 |         static inline uint8 FloatToChar(float p0, float p25,
55 |                                           float p75, float p100,
56 |                                           float value);
57 | 
58 |         // this is used only in the kOneByteWithColHeaders compression format.
59 |         static inline float CharToFloat(float p0, float p25,
60 |                                   float p75, float p100,
61 |                                   uint8 value);
62 | 
63 |         void *data_;
64 | };
65 | 
66 | #endif


--------------------------------------------------------------------------------
/include/compute-cmvn-stats.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMPUTE_CMVN_STATS
 2 | #define COMPUTE_CMVN_STATS
 3 | 
 4 | #include "common.h"
 5 | 
 6 | void InitCmvnStats(int32 dim, P_MatrixDouble stats);
 7 | void AccCmvnStats(const P_Matrix feats, P_MatrixDouble stats);
 8 | void ApplyCmvn(const P_MatrixDouble stats, bool var_norm, P_Matrix feats);
 9 | 
10 | #endif


--------------------------------------------------------------------------------
/include/feature-mfcc.h:
--------------------------------------------------------------------------------
  1 | #ifndef FEATURE_MFCC
  2 | #define FEATURE_MFCC
  3 | 
  4 | #include "common.h"
  5 | #include "srfft.h"
  6 | 
  7 | int32 RoundUpToNearestPowerOfTwo(int32 n);
  8 | 
  9 | enum WindowsType
 10 | {
 11 |     hanning = 0,
 12 |     sine,
 13 |     hamming,
 14 |     povey,
 15 |     rectangular,
 16 |     blackman
 17 | };
 18 | 
 19 | struct FrameExtractionOptions
 20 | {
 21 |     BaseFloat samp_freq;
 22 |     BaseFloat frame_shift_ms;  // in milliseconds.
 23 |     BaseFloat frame_length_ms;  // in milliseconds.
 24 |     BaseFloat dither;  // Amount of dithering, 0.0 means no dither.
 25 |     BaseFloat preemph_coeff;  // Preemphasis coefficient.
 26 |     bool remove_dc_offset;  // Subtract mean of wave before FFT.
 27 |     vector<BaseFloat> window;
 28 |     WindowsType window_type;
 29 |     BaseFloat blackman_coeff;
 30 | 
 31 |     FrameExtractionOptions():
 32 |       samp_freq(8000),
 33 |       frame_shift_ms(10.0),
 34 |       frame_length_ms(25.0),
 35 |       dither(1.0),
 36 |       preemph_coeff(0.97),
 37 |       remove_dc_offset(true),
 38 |       window_type(povey),
 39 |       blackman_coeff(0.42){};
 40 | 
 41 |     int32 WindowShift() const {
 42 |         return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
 43 |     }
 44 |     int32 WindowSize() const {
 45 |         return static_cast<int32>(samp_freq * 0.001 * frame_length_ms);
 46 |     }
 47 |     int32 PaddedWindowSize() const {
 48 |         return RoundUpToNearestPowerOfTwo(WindowSize());
 49 |     }
 50 | };
 51 | 
 52 | struct MelBanksOptions {
 53 |   int32 num_bins;  // e.g. 25; number of triangular bins
 54 |   BaseFloat low_freq;  // e.g. 20; lower frequency cutoff
 55 |   BaseFloat high_freq;  // an upper frequency cutoff; 0 -> no cutoff, negative
 56 |   // ->added to the Nyquist frequency to get the cutoff.
 57 |   BaseFloat vtln_low;  // vtln lower cutoff of warping function.
 58 |   BaseFloat vtln_high;  // vtln upper cutoff of warping function: if negative, added
 59 |                         // to the Nyquist frequency to get the cutoff.
 60 | 
 61 |   explicit MelBanksOptions(int num_bins = 25)
 62 |       : num_bins(num_bins), low_freq(20), high_freq(0), vtln_low(100),
 63 |         vtln_high(-500) {}
 64 | };
 65 | 
 66 | class MelBanks {
 67 |  public:
 68 | 
 69 |   static inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
 70 |     return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
 71 |   }
 72 | 
 73 |   static inline BaseFloat MelScale(BaseFloat freq) {
 74 |     return 1127.0f * logf (1.0f + freq / 700.0f);
 75 |   }
 76 | 
 77 |   static BaseFloat VtlnWarpFreq(BaseFloat vtln_low_cutoff,
 78 |                                 BaseFloat vtln_high_cutoff,  // discontinuities in warp func
 79 |                                 BaseFloat low_freq,
 80 |                                 BaseFloat high_freq,  // upper+lower frequency cutoffs in
 81 |                                 // the mel computation
 82 |                                 BaseFloat vtln_warp_factor,
 83 |                                 BaseFloat freq);
 84 | 
 85 |   static BaseFloat VtlnWarpMelFreq(BaseFloat vtln_low_cutoff,
 86 |                                    BaseFloat vtln_high_cutoff,
 87 |                                    BaseFloat low_freq,
 88 |                                    BaseFloat high_freq,
 89 |                                    BaseFloat vtln_warp_factor,
 90 |                                    BaseFloat mel_freq);
 91 | 
 92 | 
 93 |   MelBanks(const MelBanksOptions &opts,
 94 |            const FrameExtractionOptions &frame_opts,
 95 |            BaseFloat vtln_warp_factor);
 96 | 
 97 |   /// Compute Mel energies (note: not log enerties).
 98 |   /// At input, "fft_energies" contains the FFT energies (not log).
 99 |   void Compute(const vector<BaseFloat> &fft_energies,
100 |                vector<BaseFloat> &mel_energies_out) const;
101 | 
102 |   int32 NumBins() const { return bins_.size(); }
103 | 
104 |   // returns vector of central freq of each bin; needed by plp code.
105 |   const vector<BaseFloat> &GetCenterFreqs() const { return center_freqs_; }
106 | 
107 |   const std::vector<std::pair<int32, vector<BaseFloat> > >& GetBins() const {
108 |     return bins_;
109 |   }
110 | 
111 |  private:
112 | 
113 |   // center frequencies of bins, numbered from 0 ... num_bins-1.
114 |   // Needed by GetCenterFreqs().
115 |   vector<BaseFloat> center_freqs_;
116 | 
117 |   // the "bins_" vector is a vector, one for each bin, of a pair:
118 |   // (the first nonzero fft-bin), (the vector of weights).
119 |   std::vector<std::pair<int32, vector<BaseFloat> > > bins_;
120 | };
121 | 
122 | struct MfccOptions
123 | {
124 |     MelBanksOptions mel_opts;
125 |     int num_ceps;   // e.g. 13: num cepstral coeffs, counting zero.
126 |     BaseFloat cepstral_lifter;  // Scaling factor on cepstra for HTK compatibility.
127 |                               // if 0.0, no liftering is done.
128 | 
129 |     MfccOptions()
130 |     : mel_opts(23),
131 |       num_ceps(13),
132 |       cepstral_lifter(22.0)
133 |     {};
134 | };
135 | 
136 | struct RandomState {
137 |     RandomState();
138 |     unsigned seed;
139 | };
140 | 
141 | class MfccComputer
142 | {
143 |     public:
144 |         MfccComputer();
145 |         ~MfccComputer();
146 |         void ComputeFeatures(const vector<BaseFloat> &wave, BaseFloat sample_freq, BaseFloat vtln_warp, P_Matrix output);
147 | 
148 |     private:
149 |         int32 NumFrames(int64 num_samples);
150 |         void ExtractWindow(const vector<BaseFloat> &wave, int32 f, BaseFloat vtln_warp, vector<BaseFloat> &window, BaseFloat* output);
151 |         void ProcessWindow(vector<BaseFloat> window, BaseFloat vtln_warp, BaseFloat* output);
152 |         const MelBanks *GetMelBanks(BaseFloat vtln_warp);
153 | 
154 |         MfccOptions mfccOptions;
155 |         FrameExtractionOptions frameOptions;
156 |         std::map<BaseFloat, MelBanks*> mel_banks_;  // BaseFloat is VTLN coefficient.
157 |         SplitRadixRealFft *srfft;
158 | 
159 |         vector<BaseFloat> mel_energies_;
160 |         Matrix dct_matrix;
161 |         vector<BaseFloat> lifter_coeffs_;
162 | };
163 | 
164 | #endif


--------------------------------------------------------------------------------
/include/fstreader.h:
--------------------------------------------------------------------------------
 1 | #ifndef FST_READER
 2 | #define FST_READER
 3 | 
 4 | #include "common.h"
 5 | 
 6 | class FstHeader {
 7 |     public:
 8 |         bool Read(const char* fileName);
 9 |         ~FstHeader();
10 | 
11 |         char* fsttype;      // E.g. "vector".
12 |         char* arctype;      // E.g. "standard".
13 |         int32 version;      // Type version number.
14 |         int32 flags;        // File format bits.
15 |         uint64 properties;  // FST property bits.
16 |         int64 start;        // Start state.
17 |         int64 numstates;    // # of states.
18 |         int64 numarcs;      // # of arcs.
19 |     private:
20 |         void ReadString(char **buf, FILE *fp);
21 |         void ReadInt(void *buf, int bytes, FILE *fp);
22 | };
23 | 
24 | typedef struct tagArc
25 | {
26 |     int ilabel;
27 |     int olabel;
28 |     BaseFloat weight;
29 |     int nextstate;
30 | } Arc, *P_Arc;
31 | 
32 | typedef struct tagState
33 | {
34 |     BaseFloat weight;
35 |     int field1;
36 |     int arcNum;
37 |     int field3;
38 |     int field4;
39 |     P_Arc arc;
40 | } State, *P_State;
41 | 
42 | class FstReader {
43 |     public:
44 |         bool Read(const char* fileName);
45 |         int Start();
46 |         ~FstReader();
47 |     //private:
48 |         FstHeader hdr;
49 |         P_State state;
50 |         P_Arc arc;
51 | };
52 | 
53 | #endif


--------------------------------------------------------------------------------
/include/simple-decoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef SIMPLE_DECODER
 2 | #define SIMPLE_DECODER
 3 | 
 4 | #include "common.h"
 5 | #include "transition-model.h"
 6 | #include "am-diag-gmm.h"
 7 | #include "fstreader.h"
 8 | 
 9 | typedef int StateId;
10 | 
11 | typedef struct tagDecodeArc
12 | {
13 |     int ilabel;
14 |     int olabel;
15 |     BaseFloat weight1;
16 |     BaseFloat weight2;
17 |     int nextstate;
18 | } DecodeArc, *P_DecodeArc;
19 | 
20 | typedef struct Token
21 | {
22 |     DecodeArc arc;
23 |     Token *prev;
24 |     int32 ref_count;
25 |     double cost;
26 | } *P_Token;
27 | 
28 | class SimpleDecoder
29 | {
30 |     public:
31 |         SimpleDecoder(TransitionModel *transmodel, AmDiagGmm *amgmm, FstReader *fst, BaseFloat beam);
32 |         bool Decode(P_Matrix feature, BaseFloat acoustic_scale);
33 |         vector<int> GetBestPath();
34 | 
35 |         void InitDecoding();
36 |         void AdvanceDecoding(P_Matrix feature, BaseFloat acoustic_scale);
37 | 
38 |     private:
39 |         class TransitionModel *m_transmodel;
40 |         class AmDiagGmm *m_amgmm;
41 |         map<StateId, Token*> cur_toks;
42 |         map<StateId, Token*> prev_toks;
43 |         class FstReader *m_fst;
44 |         BaseFloat m_beam;
45 |         int32 num_frames_decoded;
46 | 
47 |         void ProcessEmitting(P_Matrix feature, BaseFloat acoustic_scale);
48 |         void ProcessNonemitting();
49 | 
50 |         BaseFloat LogLikelihood(P_Matrix feature, int32 frame, int32 tid);
51 | 
52 |         static void ClearToks(map<StateId, Token*> &toks);
53 |         static void PruneToks(BaseFloat beam, map<StateId, Token*> *toks);
54 | };
55 | 
56 | #endif


--------------------------------------------------------------------------------
/include/srfft.h:
--------------------------------------------------------------------------------
 1 | #ifndef SRFFT_H
 2 | #define SRFFT_H
 3 | 
 4 | #include "common.h"
 5 | 
 6 | class SplitRadixComplexFft
 7 | {
 8 |     public:
 9 |         SplitRadixComplexFft(int32 N);
10 |         ~SplitRadixComplexFft();
11 | 
12 |         // Does the FFT computation, given pointers to the real and
13 |   // imaginary parts.  If "forward", do the forward FFT; else
14 |   // do the inverse FFT (without the 1/N factor).
15 |   // xr and xi are pointers to zero-based arrays of size N,
16 |   // containing the real and imaginary parts
17 |   // respectively.
18 |   void Compute(BaseFloat *xr, BaseFloat *xi, bool forward) const;
19 | 
20 |   // This version of Compute takes a single array of size N*2,
21 |   // containing [ r0 im0 r1 im1 ... ].  Otherwise its behavior is  the
22 |   // same as the version above.
23 |   void Compute(BaseFloat *x, bool forward);
24 | 
25 | 
26 |   // This version of Compute is const; it operates on an array of size N*2
27 |   // containing [ r0 im0 r1 im1 ... ], but it uses the argument "temp_buffer" as
28 |   // temporary storage instead of a class-member variable.  It will allocate it if
29 |   // needed.
30 |   void Compute(BaseFloat *x, bool forward, std::vector<BaseFloat> *temp_buffer) const;
31 | 
32 |     private:
33 |         void ComputeTables();
34 |         void ComputeRecursive(BaseFloat *xr, BaseFloat *xi, int32 logn) const;
35 |         void BitReversePermute(BaseFloat *x, int32 logn) const;
36 | 
37 |         int32 N_;
38 |         int32 logn_;  // log(N)
39 | 
40 |         int32 *brseed_;
41 |         // brseed is Evans' seed table, ref:  (Ref: D. M. W.
42 |         // Evans, "An improved digit-reversal permutation algorithm ...",
43 |         // IEEE Trans. ASSP, Aug. 1987, pp. 1120-1125).
44 |         BaseFloat **tab_;       // Tables of butterfly coefficients.
45 |     protected:
46 |         std::vector<BaseFloat> temp_buffer_;
47 | };
48 | 
49 | class SplitRadixRealFft: private SplitRadixComplexFft {
50 |  public:
51 |   SplitRadixRealFft(int32 N):  // will fail unless N>=4 and N is a power of 2.
52 |       SplitRadixComplexFft (N/2), N_(N) { }
53 | 
54 |   /// If forward == true, this function transforms from a sequence of N real points to its complex fourier
55 |   /// transform; otherwise it goes in the reverse direction.  If you call it
56 |   /// in the forward and then reverse direction and multiply by 1.0/N, you
57 |   /// will get back the original data.
58 |   /// The interpretation of the complex-FFT data is as follows: the array
59 |   /// is a sequence of complex numbers C_n of length N/2 with (real, im) format,
60 |   /// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...].
61 |   void Compute(BaseFloat *x, bool forward);
62 | 
63 | 
64 |   /// This is as the other Compute() function, but it is a const version that
65 |   /// uses a user-supplied buffer.
66 |   void Compute(BaseFloat *x, bool forward, std::vector<BaseFloat> *temp_buffer) const;
67 | 
68 |  private:
69 |   int N_;
70 | };
71 | 
72 | #endif


--------------------------------------------------------------------------------
/include/transition-model.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRANSITION_MODEL
 2 | #define TRANSITION_MODEL
 3 | 
 4 | #include "common.h"
 5 | 
 6 | typedef struct tagHmmState
 7 | {
 8 |     int32 forward_pdf_class;
 9 |     int32 self_loop_pdf_class;
10 |     vector<pair<int32, BaseFloat> > transitions;
11 | } HmmState, *P_HmmState;
12 | 
13 | typedef vector<HmmState> TopologyEntry;
14 | 
15 | typedef struct tagHmmTopology
16 | {
17 |     vector<int32> phones;
18 |     vector<int32> phone2idx;
19 |     vector<TopologyEntry> entries;
20 | } HmmTopology, *P_HmmTopology;
21 | 
22 | typedef struct tagTuple
23 | {
24 |     int32 phone;
25 |     int32 hmm_state;
26 |     int32 forward_pdf;
27 |     int32 self_loop_pdf;
28 | } Tuple, *P_Tuple;
29 | 
30 | class TransitionModel
31 | {
32 |     public:
33 |         void Read(FILE *fp);
34 |         int32 TransitionIdToPdf(int32 trans_id) const;
35 | 
36 |     private:
37 |         HmmTopology topo;
38 |         vector<Tuple> tuples;
39 |         vector<int32> state2id;
40 |         vector<int32> id2state;
41 |         vector<int32> id2pdf_id;
42 |         vector<BaseFloat> log_probs;
43 |         vector<BaseFloat> non_self_loop_log_probs;
44 |         int32 num_pdfs;
45 | 
46 |         void ReadTopo(FILE *fp);
47 | 
48 |         void ComputeDerived();
49 |         bool IsSelfLoop(int32 trans_id) const;
50 |         const TopologyEntry& TopologyForPhone(int32 phone) const;
51 |         void ComputeDerivedOfProbs();
52 |         int32 SelfLoopOf(int32 trans_state) const;
53 |         BaseFloat GetTransitionLogProb(int32 trans_id) const;
54 |         int32 PairToTransitionId(int32 trans_state, int32 trans_index) const;
55 |         int32 NumTransitionStates();
56 | };
57 | 
58 | #endif


--------------------------------------------------------------------------------
/include/wavereader.h:
--------------------------------------------------------------------------------
 1 | #ifndef WAVE_READER
 2 | #define WAVE_READER
 3 | 
 4 | #include "common.h"
 5 | 
 6 | typedef struct tagWaveHeader
 7 | {
 8 |     uint8  chunk_id[4];      //'RIFF'
 9 |     uint32 chunk_size;
10 |     uint8  format[4];        //'WAVE'
11 |     uint8  subchunk1_id[4];  //'FMT'
12 |     uint32 subchunk1_size;   //PCM = 16
13 |     uint16 audio_format;     //PCM = 1
14 |     uint16 channels;
15 |     uint32 sample_rate;
16 |     uint32 byte_rate;
17 |     uint16 block_align;      //NumChannels * BitsPerSample / 8
18 |     uint16 bit_per_sample;
19 |     uint8  subchunk2_id[4];  //'DATA'
20 |     uint32 subchunk2_size;
21 | } WaveHeader, *P_WaveHeader;
22 | 
23 | typedef struct tagWaveFile
24 | {
25 |     WaveHeader header;
26 |     vector<int16> data;
27 | } WaveFile, *P_WaveFile;
28 | 
29 | class WaveReader
30 | {
31 |     public:
32 |         WaveReader();
33 |         ~WaveReader();
34 |         void ReadWaveFile(const char* fileName);
35 | 
36 |         WaveFile m_wavefile;
37 |         vector<BaseFloat> m_waveData;
38 | 
39 |     private:
40 | };
41 | 
42 | #endif


--------------------------------------------------------------------------------
/model/HCLG.fst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ma-Dan/asr-decode/e83e8ede576bbcddd84d6d3dc2204d2639086d31/model/HCLG.fst


--------------------------------------------------------------------------------
/model/final.mdl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ma-Dan/asr-decode/e83e8ede576bbcddd84d6d3dc2204d2639086d31/model/final.mdl


--------------------------------------------------------------------------------
/src/add-deltas.cpp:
--------------------------------------------------------------------------------
 1 | #include "add-deltas.h"
 2 | 
 3 | DeltaFeatures::DeltaFeatures(const DeltaFeaturesOptions &opts): opts_(opts) {
 4 |   scales_.resize(opts.order+1);
 5 |   scales_[0].resize(1);
 6 |   scales_[0][0] = 1.0;  // trivial window for 0th order delta [i.e. baseline feats]
 7 | 
 8 |   for (int32 i = 1; i <= opts.order; i++) {
 9 |     vector<BaseFloat> &prev_scales = scales_[i-1],
10 |         &cur_scales = scales_[i];
11 |     int32 window = opts.window;  // this code is designed to still
12 |     // work if instead we later make it an array and do opts.window[i-1],
13 |     // or something like that. "window" is a parameter specifying delta-window
14 |     // width which is actually 2*window + 1.
15 |     int32 prev_offset = (static_cast<int32>(prev_scales.size()-1))/2,
16 |         cur_offset = prev_offset + window;
17 |     cur_scales.resize(prev_scales.size() + 2*window);  // also zeros it.
18 | 
19 |     BaseFloat normalizer = 0.0;
20 |     for (int32 j = -window; j <= window; j++) {
21 |       normalizer += j*j;
22 |       for (int32 k = -prev_offset; k <= prev_offset; k++) {
23 |         cur_scales[j+k+cur_offset] +=
24 |             static_cast<BaseFloat>(j) * prev_scales[k+prev_offset];
25 |       }
26 |     }
27 |     for(int32 i=0; i<cur_scales.size(); i++)
28 |     {
29 |         cur_scales[i] *= 1.0 / normalizer;
30 |     }
31 |   }
32 | }
33 | 
34 | void DeltaFeatures::Process(const P_Matrix input_feats,
35 |                             int32 frame,
36 |                             BaseFloat *output_frame) const {
37 |   int32 num_frames = input_feats->rows,
38 |       feat_dim = input_feats->cols;
39 |   for(int32 i=0; i<(opts_.order+1)*feat_dim; i++)
40 |   {
41 |       output_frame[i] = 0.0f;
42 |   }
43 |   for (int32 i = 0; i <= opts_.order; i++) {
44 |     const vector<BaseFloat> &scales = scales_[i];
45 |     int32 max_offset = (scales.size() - 1) / 2;
46 |     BaseFloat* output = output_frame + i*feat_dim;
47 |     for (int32 j = -max_offset; j <= max_offset; j++) {
48 |       // if asked to read
49 |       int32 offset_frame = frame + j;
50 |       if (offset_frame < 0) offset_frame = 0;
51 |       else if (offset_frame >= num_frames)
52 |         offset_frame = num_frames - 1;
53 |       BaseFloat scale = scales[j + max_offset];
54 |       if (scale != 0.0)
55 |       {
56 |           for(int32 k=0; k<feat_dim; k++)
57 |           {
58 |               output[k] += scale * input_feats->data[offset_frame*input_feats->cols+k];
59 |           }
60 |       }
61 |     }
62 |   }
63 | }
64 | 
65 | void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
66 |                    const P_Matrix input_features,
67 |                    P_Matrix output_features) {
68 |   output_features->rows = input_features->rows;
69 |   output_features->cols = input_features->cols*(delta_opts.order + 1);
70 |   output_features->data.resize(output_features->rows * output_features->cols);
71 |   DeltaFeatures delta(delta_opts);
72 |   for (int32 r = 0; r < static_cast<int32>(input_features->rows); r++) {
73 |     BaseFloat* row = output_features->data.data() + r*output_features->cols;
74 |     delta.Process(input_features, r, row);
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/am-diag-gmm.cpp:
--------------------------------------------------------------------------------
  1 | #include "am-diag-gmm.h"
  2 | 
  3 | void AmDiagGmm::Read(FILE *fp)
  4 | {
  5 |     int32 num_pdfs, dim;
  6 |     char token[128];
  7 | 
  8 |     ReadToken(fp, token); //<DIMENSION>
  9 |     ReadBasicType(fp, &dim);
 10 |     ReadToken(fp, token); //<NUMPDFS>
 11 |     ReadBasicType(fp, &num_pdfs);
 12 | 
 13 |     densities.reserve(num_pdfs);
 14 |     for (int32 i = 0; i < num_pdfs; i++)
 15 |     {
 16 |         densities.push_back(ReadDiagGmm(fp));
 17 |     }
 18 | }
 19 | 
 20 | AmDiagGmm::~AmDiagGmm()
 21 | {
 22 | }
 23 | 
 24 | DiagGmm& AmDiagGmm::GetPdf(int32 pdf_index) const
 25 | {
 26 |   return *(densities[pdf_index]);
 27 | }
 28 | 
 29 | DiagGmm* AmDiagGmm::ReadDiagGmm(FILE *fp)
 30 | {
 31 |     DiagGmm *diag_gmm = new DiagGmm();
 32 |     char token[128];
 33 | 
 34 |     ReadToken(fp, token); //<DiagGMMBegin> or <DiagGMM>
 35 | 
 36 |     ReadToken(fp, token);
 37 |     if(0 == strcmp(token, "<GCONSTS>"))
 38 |     {
 39 |         ReadFloatVectors(fp, &diag_gmm->gconsts);
 40 |     }
 41 | 
 42 |     ReadToken(fp, token);
 43 |     if(0 == strcmp(token, "<WEIGHTS>"))
 44 |     {
 45 |         ReadFloatVectors(fp, &diag_gmm->weights);
 46 |     }
 47 | 
 48 |     ReadToken(fp, token); //<MEANS_INVVARS>
 49 |     ReadFloatMatrix(fp, &diag_gmm->means_invvars);
 50 | 
 51 |     ReadToken(fp, token); //<INV_VARS>
 52 |     ReadFloatMatrix(fp, &diag_gmm->inv_vars);
 53 | 
 54 |     ReadToken(fp, token); //</DiagGMM>
 55 | 
 56 |     ComputeGconsts(diag_gmm);
 57 | 
 58 |     return diag_gmm;
 59 | }
 60 | 
 61 | int32 AmDiagGmm::ComputeGconsts(DiagGmm* diaggmm) {
 62 |     int32 num_mix = NumGauss(diaggmm);
 63 |     int32 dim = Dim(diaggmm);
 64 |     BaseFloat offset = -0.5 * M_LOG_2PI * dim;  // constant term in gconst.
 65 |     int32 num_bad = 0;
 66 | 
 67 |     // Resize if Gaussians have been removed during Update()
 68 |     if (num_mix != static_cast<int32>(diaggmm->gconsts.size()))
 69 |     {
 70 |         diaggmm->gconsts.resize(num_mix);
 71 |     }
 72 | 
 73 |     for (int32 mix = 0; mix < num_mix; mix++)
 74 |     {
 75 |         BaseFloat gc = logf(diaggmm->weights[mix]) + offset;  // May be -inf if weights == 0
 76 |         for (int32 d = 0; d < dim; d++)
 77 |         {
 78 |             gc += 0.5 * logf(ReadMatrix(&diaggmm->inv_vars, mix, d)) - 0.5 * ReadMatrix(&diaggmm->means_invvars, mix, d)
 79 |                   * ReadMatrix(&diaggmm->means_invvars, mix, d) / ReadMatrix(&diaggmm->inv_vars, mix, d);
 80 |         }
 81 |         // Change sign for logdet because var is inverted. Also, note that
 82 |         // mean_invvars(mix, d)*mean_invvars(mix, d)/inv_vars(mix, d) is the
 83 |         // mean-squared times inverse variance, since mean_invvars(mix, d) contains
 84 |         // the mean times inverse variance.
 85 |         // So gc is the likelihood at zero feature value.
 86 | 
 87 |         if (isnan(gc))
 88 |         {  // negative infinity is OK but NaN is not acceptable
 89 |            printf("At component %d  not a number in gconst computation", mix);
 90 |         }
 91 |         if (isinf(gc))
 92 |         {
 93 |             num_bad++;
 94 |             // If positive infinity, make it negative infinity.
 95 |             // Want to make sure the answer becomes -inf in the end, not NaN.
 96 |             if (gc > 0)
 97 |             {
 98 |                 gc = -gc;
 99 |             }
100 |         }
101 |         diaggmm->gconsts[mix] = gc;
102 |     }
103 | 
104 |     diaggmm->valid_gconsts = true;
105 |     return num_bad;
106 | }
107 | 
108 | int32 AmDiagGmm::NumGauss(DiagGmm* diaggmm) const
109 | {
110 |     return diaggmm->weights.size();
111 | }
112 | 
113 | int32 AmDiagGmm::Dim(DiagGmm* diaggmm) const
114 | {
115 |     return diaggmm->means_invvars.cols;
116 | }


--------------------------------------------------------------------------------
/src/common.cpp:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | 
  3 | void ReadToken(FILE *fp, char* s)
  4 | {
  5 |     int index = 0;
  6 |     char c = '\0';
  7 |     while(c != ' ')
  8 |     {
  9 |         fread(&c, 1, 1, fp);
 10 |         s[index] = c;
 11 |         index++;
 12 |     }
 13 | 
 14 |     s[index-1] = '\0';
 15 | }
 16 | 
 17 | void ReadIntegerVector(FILE *fp, vector<int32> *v)
 18 | {
 19 |     uint8 size = 0;
 20 |     fread(&size, sizeof(size), 1, fp);
 21 | 
 22 |     if(size != sizeof(int32))
 23 |     {
 24 |         printf("vector size error!\n");
 25 |         return;
 26 |     }
 27 | 
 28 |     uint32 vsize = 0;
 29 |     fread(&vsize, sizeof(vsize), 1, fp);
 30 | 
 31 |     int32 value;
 32 |     for(int i=0; i<vsize; i++)
 33 |     {
 34 |         fread(&value, sizeof(value), 1, fp);
 35 |         v->push_back(value);
 36 |     }
 37 | }
 38 | 
 39 | void ReadBasicType(FILE *fp, int32 *t)
 40 | {
 41 |     uint8 size = 0;
 42 |     fread(&size, sizeof(size), 1, fp);
 43 | 
 44 |     if(size != sizeof(int32))
 45 |     {
 46 |         printf("int32 size error!\n");
 47 |         return;
 48 |     }
 49 | 
 50 |     fread(t, sizeof(*t), 1, fp);
 51 | }
 52 | 
 53 | void ReadBasicType(FILE *fp, BaseFloat *t)
 54 | {
 55 |     uint8 size = 0;
 56 |     fread(&size, sizeof(size), 1, fp);
 57 | 
 58 |     if(size != sizeof(BaseFloat))
 59 |     {
 60 |         printf("float size error!\n");
 61 |         return;
 62 |     }
 63 | 
 64 |     fread(t, sizeof(*t), 1, fp);
 65 | }
 66 | 
 67 | void ReadFloatVectors(FILE *fp, vector<BaseFloat> *v)
 68 | {
 69 |     //TODO: Support other type, eg, double
 70 |     const char *my_token = "FV";
 71 |     char token[128];
 72 |     ReadToken(fp, token); //FV
 73 |     int32 size;
 74 |     ReadBasicType(fp, &size);
 75 |     v->resize(size);
 76 |     fread(v->data(), sizeof(BaseFloat), size, fp);
 77 | }
 78 | 
 79 | void ReadFloatMatrix(FILE *fp, P_Matrix m)
 80 | {
 81 |     const char *my_token = "FM";
 82 |     char token[128];
 83 |     ReadToken(fp, token); //FM
 84 | 
 85 |     int32 rows, cols;
 86 |     ReadBasicType(fp, &rows);
 87 |     ReadBasicType(fp, &cols);
 88 | 
 89 |     m->rows = rows;
 90 |     m->cols = cols;
 91 | 
 92 |     int32 skip = ((16 / sizeof(BaseFloat)) - cols % (16 / sizeof(BaseFloat))) % (16 / sizeof(BaseFloat));
 93 |     m->stride = cols + skip;
 94 | 
 95 |     int32 size = rows * cols;
 96 |     m->data.resize(size);
 97 |     fread(m->data.data(), sizeof(BaseFloat), size, fp);
 98 | }
 99 | 
100 | BaseFloat ReadMatrix(P_Matrix m, int32 row, int32 col)
101 | {
102 |     return m->data[m->cols*row+col];
103 | }


--------------------------------------------------------------------------------
/src/compressed-matrix.cpp:
--------------------------------------------------------------------------------
  1 | #include "compressed-matrix.h"
  2 | 
  3 | void CompressedMatrix::Clear() {
  4 |   if (data_ != NULL) {
  5 |     delete [] static_cast<float*>(data_);
  6 |     data_ = NULL;
  7 |   }
  8 | }
  9 | 
 10 | int32 CompressedMatrix::DataSize(const GlobalHeader &header) {
 11 |   return sizeof(GlobalHeader) +
 12 |         header.num_cols * (sizeof(PerColHeader) + header.num_rows);
 13 | }
 14 | 
 15 | void* CompressedMatrix::AllocateData(int32 num_bytes) {
 16 |   return reinterpret_cast<void*>(new float[(num_bytes/3) + 4]);
 17 | }
 18 | 
 19 | void CompressedMatrix::CopyFromMat(const P_Matrix mat)
 20 | {
 21 |     Clear();
 22 | 
 23 |     GlobalHeader global_header;
 24 |     ComputeGlobalHeader(mat, &global_header);
 25 | 
 26 |     int32 data_size = DataSize(global_header);
 27 | 
 28 |     data_ = AllocateData(data_size);
 29 | 
 30 |     *(reinterpret_cast<GlobalHeader*>(data_)) = global_header;
 31 | 
 32 |     PerColHeader *header_data =
 33 |         reinterpret_cast<PerColHeader*>(static_cast<char*>(data_) +
 34 |                                         sizeof(GlobalHeader));
 35 |     uint8 *byte_data =
 36 |         reinterpret_cast<uint8*>(header_data + global_header.num_cols);
 37 | 
 38 |     const BaseFloat *matrix_data = mat->data.data();
 39 | 
 40 |     for (int32 col = 0; col < global_header.num_cols; col++) {
 41 |       CompressColumn(global_header,
 42 |                      matrix_data + col, mat->cols,
 43 |                      global_header.num_rows,
 44 |                      header_data, byte_data);
 45 |       header_data++;
 46 |       byte_data += global_header.num_rows;
 47 |     }
 48 | }
 49 | 
 50 | void CompressedMatrix::CopyToMat(P_Matrix mat) const {
 51 |   GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
 52 |   int32 num_cols = h->num_cols, num_rows = h->num_rows;
 53 |   if (1) {
 54 |     PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
 55 |     uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
 56 |                                                 h->num_cols);
 57 |     for (int32 i = 0; i < num_cols; i++, per_col_header++) {
 58 |       float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
 59 |           p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
 60 |           p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
 61 |           p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
 62 |       for (int32 j = 0; j < num_rows; j++, byte_data++) {
 63 |         float f = CharToFloat(p0, p25, p75, p100, *byte_data);
 64 |         mat->data[j*num_cols+i] = f;
 65 |       }
 66 |     }
 67 |   }
 68 | }
 69 | 
 70 | void CompressedMatrix::ComputeGlobalHeader(const P_Matrix mat, GlobalHeader *header)
 71 | {
 72 |     header->num_rows = mat->rows;
 73 |     header->num_cols = mat->cols;
 74 | 
 75 |     BaseFloat min_value, max_value;
 76 |     GetMinMax(mat, &min_value, &max_value);
 77 | 
 78 |     header->min_value = min_value;
 79 |     header->range = max_value - min_value;
 80 | }
 81 | 
 82 | void CompressedMatrix::GetMinMax(const P_Matrix mat, BaseFloat* pMin, BaseFloat* pMax)
 83 | {
 84 |     int32 total = mat->rows * mat->cols;
 85 | 
 86 |     *pMin = mat->data[0];
 87 |     *pMax = mat->data[0];
 88 | 
 89 |     for(int32 i=1; i<total; i++)
 90 |     {
 91 |         if(*pMin > mat->data[i])
 92 |         {
 93 |             *pMin = mat->data[i];
 94 |         }
 95 | 
 96 |         if(*pMax < mat->data[i])
 97 |         {
 98 |             *pMax = mat->data[i];
 99 |         }
100 |     }
101 | }
102 | 
103 | void CompressedMatrix::CompressColumn(
104 |     const GlobalHeader &global_header,
105 |     const BaseFloat *data, int32 stride,
106 |     int32 num_rows, PerColHeader *header,
107 |     uint8 *byte_data) {
108 |   ComputeColHeader(global_header, data, stride,
109 |                    num_rows, header);
110 | 
111 |   float p0 = Uint16ToFloat(global_header, header->percentile_0),
112 |       p25 = Uint16ToFloat(global_header, header->percentile_25),
113 |       p75 = Uint16ToFloat(global_header, header->percentile_75),
114 |       p100 = Uint16ToFloat(global_header, header->percentile_100);
115 | 
116 |   for (int32 i = 0; i < num_rows; i++) {
117 |     BaseFloat this_data = data[i * stride];
118 |     byte_data[i] = FloatToChar(p0, p25, p75, p100, this_data);
119 |   }
120 | }
121 | 
122 | void CompressedMatrix::ComputeColHeader(
123 |     const GlobalHeader &global_header,
124 |     const BaseFloat *data, int32 stride,
125 |     int32 num_rows, PerColHeader *header) {
126 |   std::vector<BaseFloat> sdata(num_rows); // the sorted data.
127 |   for (size_t i = 0, size = sdata.size(); i < size; i++)
128 |     sdata[i] = data[i*stride];
129 | 
130 |   if (num_rows >= 5) {
131 |     int quarter_nr = num_rows/4;
132 |     // std::sort(sdata.begin(), sdata.end());
133 |     // The elements at positions 0, quarter_nr,
134 |     // 3*quarter_nr, and num_rows-1 need to be in sorted order.
135 |     std::nth_element(sdata.begin(), sdata.begin() + quarter_nr, sdata.end());
136 |     // Now, sdata.begin() + quarter_nr contains the element that would appear
137 |     // in sorted order, in that position.
138 |     std::nth_element(sdata.begin(), sdata.begin(), sdata.begin() + quarter_nr);
139 |     // Now, sdata.begin() and sdata.begin() + quarter_nr contain the elements
140 |     // that would appear at those positions in sorted order.
141 |     std::nth_element(sdata.begin() + quarter_nr + 1,
142 |                      sdata.begin() + (3*quarter_nr), sdata.end());
143 |     // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
144 |     // 3*quarter_nr, contain the elements that would appear at those positions
145 |     // in sorted order.
146 |     std::nth_element(sdata.begin() + (3*quarter_nr) + 1, sdata.end() - 1,
147 |                      sdata.end());
148 |     // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
149 |     // 3*quarter_nr, and sdata.end() - 1, contain the elements that would appear
150 |     // at those positions in sorted order.
151 | 
152 |     header->percentile_0 =
153 |         std::min<uint16>(FloatToUint16(global_header, sdata[0]), 65532);
154 |     header->percentile_25 =
155 |         std::min<uint16>(
156 |             std::max<uint16>(
157 |                 FloatToUint16(global_header, sdata[quarter_nr]),
158 |                 header->percentile_0 + static_cast<uint16>(1)), 65533);
159 |     header->percentile_75 =
160 |         std::min<uint16>(
161 |             std::max<uint16>(
162 |                 FloatToUint16(global_header, sdata[3*quarter_nr]),
163 |                 header->percentile_25 + static_cast<uint16>(1)), 65534);
164 |     header->percentile_100 = std::max<uint16>(
165 |         FloatToUint16(global_header, sdata[num_rows-1]),
166 |         header->percentile_75 + static_cast<uint16>(1));
167 | 
168 |   }
169 | }
170 | 
171 | inline uint16 CompressedMatrix::FloatToUint16(
172 |     const GlobalHeader &global_header,
173 |     float value) {
174 |   float f = (value - global_header.min_value) /
175 |       global_header.range;
176 |   if (f > 1.0) f = 1.0;  // Note: this should not happen.
177 |   if (f < 0.0) f = 0.0;  // Note: this should not happen.
178 |   return static_cast<int>(f * 65535 + 0.499);  // + 0.499 is to
179 |   // round to closest int; avoids bias.
180 | }
181 | 
182 | inline uint8 CompressedMatrix::FloatToChar(
183 |     float p0, float p25, float p75, float p100,
184 |     float value) {
185 |   int ans;
186 |   if (value < p25) {  // range [ p0, p25 ) covered by
187 |     // characters 0 .. 64.  We round to the closest int.
188 |     float f = (value - p0) / (p25 - p0);
189 |     ans = static_cast<int>(f * 64 + 0.5);
190 |     // Note: the checks on the next two lines
191 |     // are necessary in pathological cases when all the elements in a row
192 |     // are the same and the percentile_* values are separated by one.
193 |     if (ans < 0) ans = 0;
194 |     if (ans > 64) ans = 64;
195 |   } else if (value < p75) {  // range [ p25, p75 )covered
196 |     // by characters 64 .. 192.  We round to the closest int.
197 |     float f = (value - p25) / (p75 - p25);
198 |     ans = 64 + static_cast<int>(f * 128 + 0.5);
199 |     if (ans < 64) ans = 64;
200 |     if (ans > 192) ans = 192;
201 |   } else {  // range [ p75, p100 ] covered by
202 |     // characters 192 .. 255.  Note: this last range
203 |     // has fewer characters than the left range, because
204 |     // we go up to 255, not 256.
205 |     float f = (value - p75) / (p100 - p75);
206 |     ans = 192 + static_cast<int>(f * 63 + 0.5);
207 |     if (ans < 192) ans = 192;
208 |     if (ans > 255) ans = 255;
209 |   }
210 |   return static_cast<uint8>(ans);
211 | }
212 | 
213 | inline float CompressedMatrix::Uint16ToFloat(
214 |     const GlobalHeader &global_header,
215 |     uint16 value) {
216 |   // the constant 1.52590218966964e-05 is 1/65535.
217 |   return global_header.min_value
218 |       + global_header.range * 1.52590218966964e-05F * value;
219 | }
220 | 
221 | inline float CompressedMatrix::CharToFloat(
222 |     float p0, float p25, float p75, float p100,
223 |     uint8 value) {
224 |   if (value <= 64) {
225 |     return p0 + (p25 - p0) * value * (1/64.0);
226 |   } else if (value <= 192) {
227 |     return p25 + (p75 - p25) * (value - 64) * (1/128.0);
228 |   } else {
229 |     return p75 + (p100 - p75) * (value - 192) * (1/63.0);
230 |   }
231 | }
232 | 


--------------------------------------------------------------------------------
/src/compute-cmvn-stats.cpp:
--------------------------------------------------------------------------------
 1 | #include "compute-cmvn-stats.h"
 2 | 
 3 | void InitCmvnStats(int32 dim, P_MatrixDouble stats) {
 4 |     stats->rows = 2;
 5 |     stats->cols = dim+1;
 6 |     stats->data.resize(2*(dim+1));
 7 | }
 8 | 
 9 | void AccCmvnStats(const BaseFloat* feats, int32 dim, BaseFloat weight, P_MatrixDouble stats) {
10 |   // Remove these __restrict__ modifiers if they cause compilation problems.
11 |   // It's just an optimization.
12 |    double *__restrict__ mean_ptr = stats->data.data(),
13 |        *__restrict__ var_ptr = stats->data.data()+stats->cols,
14 |        *__restrict__ count_ptr = mean_ptr + dim;
15 |    const BaseFloat * __restrict__ feats_ptr = feats;
16 |   *count_ptr += weight;
17 |   // Careful-- if we change the format of the matrix, the "mean_ptr < count_ptr"
18 |   // statement below might become wrong.
19 |   for (; mean_ptr < count_ptr; mean_ptr++, var_ptr++, feats_ptr++) {
20 |     *mean_ptr += *feats_ptr * weight;
21 |     *var_ptr +=  *feats_ptr * *feats_ptr * weight;
22 |   }
23 | }
24 | 
25 | void AccCmvnStats(const P_Matrix feats, P_MatrixDouble stats) {
26 |   int32 num_frames = feats->rows;
27 |   for (int32 i = 0; i < num_frames; i++) {
28 |     const BaseFloat* this_frame = feats->data.data() + i * feats->cols;
29 |     BaseFloat weight = 1.0;
30 |     if (weight != 0.0)
31 |       AccCmvnStats(this_frame, feats->cols, weight, stats);
32 |   }
33 | }
34 | 
35 | void ApplyCmvn(const P_MatrixDouble stats,
36 |                bool var_norm,
37 |                P_Matrix feats) {
38 |   int32 dim = stats->cols - 1;
39 | 
40 |   double count = stats->data[dim];
41 | 
42 |   if (!var_norm) {
43 |     vector<BaseFloat> offset;
44 |     offset.resize(dim);
45 |     for(int32 i=0; i<dim; i++)
46 |     {
47 |         offset[i] = -stats->data[i] / stats->data[dim];
48 |     }
49 |     for(int32 i=0; i<feats->rows; i++)
50 |     {
51 |         for(int32 j=0; j<feats->cols; j++)
52 |         {
53 |             feats->data[i*feats->cols+j] += offset[j];
54 |         }
55 |     }
56 |     return;
57 |   }
58 |   // norm(0, d) = mean offset;
59 |   // norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
60 |   Matrix norm;
61 |   norm.rows = 2;
62 |   norm.cols = dim;
63 |   norm.data.resize(2*dim);
64 |   for (int32 d = 0; d < dim; d++) {
65 |     double mean, offset, scale;
66 |     mean = stats->data[d]/count;
67 |     double var = (stats->data[1*stats->cols + d]/count) - mean*mean,
68 |         floor = 1.0e-20;
69 |     scale = 1.0 / sqrt(var);
70 |     offset = -(mean*scale);
71 |     norm.data[d] = offset;
72 |     norm.data[1*norm.cols+d] = scale;
73 |   }
74 |   // Apply the normalization.
75 |   //feats->MulColsVec(norm.Row(1));
76 |   //feats->AddVecToRows(1.0, norm.Row(0));
77 | }
78 | 


--------------------------------------------------------------------------------
/src/decode.cpp:
--------------------------------------------------------------------------------
 1 | #include "wavereader.h"
 2 | #include "transition-model.h"
 3 | #include "am-diag-gmm.h"
 4 | #include "fstreader.h"
 5 | #include "feature-mfcc.h"
 6 | #include "compressed-matrix.h"
 7 | #include "compute-cmvn-stats.h"
 8 | #include "add-deltas.h"
 9 | #include "simple-decoder.h"
10 | 
11 | int main(int argc, char* argv[])
12 | {
13 |     if(argc < 4)
14 |     {
15 |         printf("arg error\n");
16 |         return -1;
17 |     }
18 | 
19 |     char* mdlFileName = argv[1];
20 |     char* fstFileName = argv[2];
21 |     char* waveFileName = argv[3];
22 | 
23 |     BaseFloat vtln_warp = 1.0;
24 | 
25 |     BaseFloat acoustic_scale = 0.083333;
26 |     BaseFloat beam = 16.0;
27 | 
28 |     // Read Transition model and GMM AM model
29 |     FILE *fpMdl = fopen(mdlFileName, "rb");
30 | 
31 |     bool binary = false;
32 |     char hdr[2];
33 |     fread(hdr, 2, 1, fpMdl);
34 |     if(hdr[1] == 'B')
35 |     {
36 |         binary = true;
37 |     }
38 |     TransitionModel trans_model;
39 |     trans_model.Read(fpMdl);
40 | 
41 |     AmDiagGmm am_gmm;
42 |     am_gmm.Read(fpMdl);
43 | 
44 |     fclose(fpMdl);
45 | 
46 |     // Read HCLG fst
47 |     FstReader fstReader;
48 |     fstReader.Read(fstFileName);
49 | 
50 |     // Read wave file
51 |     WaveReader waveReader;
52 |     waveReader.ReadWaveFile(waveFileName);
53 | 
54 |     // Compute MFCC
55 |     MfccComputer mfccComputer;
56 |     Matrix feats;
57 |     mfccComputer.ComputeFeatures(waveReader.m_waveData, waveReader.m_wavefile.header.sample_rate, vtln_warp, &feats);
58 | 
59 |     // Compress matrix
60 |     CompressedMatrix compressedMatrix;
61 |     compressedMatrix.CopyFromMat(&feats);
62 |     compressedMatrix.CopyToMat(&feats);
63 | 
64 |     // Compute CMVN stats and apply
65 |     MatrixDouble cmvn_stats;
66 |     InitCmvnStats(feats.cols, &cmvn_stats);
67 |     AccCmvnStats(&feats, &cmvn_stats);
68 |     ApplyCmvn(&cmvn_stats, false, &feats);
69 | 
70 |     // Add deltas
71 |     DeltaFeaturesOptions opts;
72 |     Matrix feature;
73 |     ComputeDeltas(opts, &feats, &feature);
74 | 
75 |     // Decode feature
76 |     SimpleDecoder decoder(&trans_model, &am_gmm, &fstReader, beam);
77 |     decoder.Decode(&feature, acoustic_scale);
78 | 
79 |     vector<int> result = decoder.GetBestPath();
80 | 
81 |     printf("Decoded result: ");
82 |     for(int i=0; i<result.size(); i++)
83 |     {
84 |         printf("%d ", result[i]);
85 |     }
86 |     printf("\n");
87 | 
88 |     return 0;
89 | }


--------------------------------------------------------------------------------
/src/feature-mfcc.cpp:
--------------------------------------------------------------------------------
  1 | #include "feature-mfcc.h"
  2 | 
  3 | int32 RoundUpToNearestPowerOfTwo(int32 n) {
  4 |         n--;
  5 |         n |= n >> 1;
  6 |         n |= n >> 2;
  7 |         n |= n >> 4;
  8 |         n |= n >> 8;
  9 |         n |= n >> 16;
 10 |         return n+1;
 11 | }
 12 | 
 13 | int Rand(struct RandomState* state) {
 14 |     if (state) {
 15 |         return rand_r(&(state->seed));
 16 |     } else {
 17 |         return rand();
 18 |     }
 19 | }
 20 | 
 21 | RandomState::RandomState() {
 22 |   // we initialize it as Rand() + 27437 instead of just Rand(), because on some
 23 |   // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
 24 |   // the case that rand_r when initialized with rand() will give you the exact
 25 |   // same sequence of numbers that rand() will give if you keep calling rand()
 26 |   // after that initial call.  This can cause problems with repeated sequences.
 27 |   // For example if you initialize two RandomState structs one after the other
 28 |   // without calling rand() in between, they would give you the same sequence
 29 |   // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
 30 |   // a randomly chosen prime number.
 31 |   seed = unsigned(Rand(NULL)) + 27437;
 32 | }
 33 | 
 34 | /// Returns a random number strictly between 0 and 1.
 35 | inline float RandUniform(struct RandomState* state = NULL) {
 36 |   return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
 37 | }
 38 | 
 39 | inline float RandGauss(struct RandomState* state = NULL) {
 40 |   return static_cast<float>(sqrtf (-2 * logf(RandUniform(state)))
 41 |                             * cosf(2*M_PI*RandUniform(state)));
 42 | }
 43 | 
 44 | void Dither(vector<BaseFloat> &waveform, int32 frame_length, BaseFloat dither_value) {
 45 |     if (dither_value == 0.0)
 46 |     {
 47 |         return;
 48 |     }
 49 |     BaseFloat *data = waveform.data();
 50 |     RandomState rstate;
 51 |     for (int32 i = 0; i < frame_length; i++)
 52 |     {
 53 |         data[i] += RandGauss(&rstate) * dither_value;
 54 |     }
 55 | }
 56 | 
 57 | BaseFloat Sum(vector<BaseFloat> window)
 58 | {
 59 |     BaseFloat sum = 0.0f;
 60 | 
 61 |     for(int i=0; i<window.size(); i++)
 62 |     {
 63 |         sum += window[i];
 64 |     }
 65 | 
 66 |     return sum;
 67 | }
 68 | 
 69 | void Preemphasize(vector<BaseFloat> &waveform, int32 frame_length, BaseFloat preemph_coeff)
 70 | {
 71 |     if (preemph_coeff == 0.0)
 72 |     {
 73 |         return;
 74 |     }
 75 |     for (int32 i = frame_length-1; i > 0; i--)
 76 |     {
 77 |         waveform[i] -= preemph_coeff * waveform[i-1];
 78 |     }
 79 | 
 80 |     waveform[0] -= preemph_coeff * waveform[0];
 81 | }
 82 | 
 83 | void ComputePowerSpectrum(vector<BaseFloat> &waveform) {
 84 |   int32 dim = waveform.size();
 85 | 
 86 |   // no, letting it be non-power-of-two for now.
 87 |   // KALDI_ASSERT(dim > 0 && (dim & (dim-1) == 0));  // make sure a power of two.. actually my FFT code
 88 |   // does not require this (dan) but this is better in case we use different code [dan].
 89 | 
 90 |   // RealFft(waveform, true);  // true == forward (not inverse) FFT; makes no difference here,
 91 |   // as we just want power spectrum.
 92 | 
 93 |   // now we have in waveform, first half of complex spectrum
 94 |   // it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
 95 |   int32 half_dim = dim/2;
 96 |   BaseFloat first_energy = waveform[0] * waveform[0],
 97 |       last_energy = waveform[1] * waveform[1];  // handle this special case
 98 |   for (int32 i = 1; i < half_dim; i++) {
 99 |     BaseFloat real = waveform[i*2], im = waveform[i*2 + 1];
100 |     waveform[i] = real*real + im*im;
101 |   }
102 |   waveform[0] = first_energy;
103 |   waveform[half_dim] = last_energy;  // Will actually never be used, and anyway
104 |   // if the signal has been bandlimited sensibly this should be zero.
105 | }
106 | 
107 | void ApplyFloor(vector<BaseFloat> &v, BaseFloat floor_val)
108 | {
109 |     for (int32 i = 0; i < v.size(); i++) {
110 |       v[i] = std::max(v[i], floor_val);
111 |     }
112 | }
113 | 
114 | void ApplyLog(vector<BaseFloat> &v)
115 | {
116 |     for (int32 i = 0; i < v.size(); i++) {
117 |       v[i] = logf(v[i]);
118 |     }
119 | }
120 | 
121 | void PrepareMatrix(P_Matrix m, int32 rows, int32 cols)
122 | {
123 |     m->rows = rows;
124 |     m->cols = cols;
125 | 
126 |     m->data.resize(rows * cols);
127 | }
128 | 
129 | void ComputeDctMatrix(P_Matrix M) {
130 |   //KALDI_ASSERT(M->NumRows() == M->NumCols());
131 |   int32 K = M->rows;
132 |   int32 N = M->cols;
133 | 
134 |   BaseFloat normalizer = sqrt(1.0 / static_cast<BaseFloat>(N));  // normalizer for
135 |   // X_0.
136 |   for (int32 j = 0; j < N; j++) M->data[0*M->cols + j] = normalizer;
137 |   normalizer = sqrt(2.0 / static_cast<BaseFloat>(N));  // normalizer for other
138 |    // elements.
139 |   for (int32 k = 1; k < K; k++)
140 |     for (int32 n = 0; n < N; n++)
141 |       M->data[k*M->cols + n] = normalizer
142 |           * cos( static_cast<double>(M_PI)/N * (n + 0.5) * k );
143 | }
144 | 
145 | void ComputeLifterCoeffs(BaseFloat Q, vector<BaseFloat> &coeffs) {
146 |   // Compute liftering coefficients (scaling on cepstral coeffs)
147 |   // coeffs are numbered slightly differently from HTK: the zeroth
148 |   // index is C0, which is not affected.
149 |   for (int32 i = 0; i < coeffs.size(); i++)
150 |     coeffs[i] = 1.0 + 0.5 * Q * sin (M_PI * i / Q);
151 | }
152 | 
153 | void PrepareFeatureWindowFunction(FrameExtractionOptions &opts) {
154 |   int32 frame_length = opts.WindowSize();
155 |   opts.window.resize(frame_length);
156 |   double a = M_2PI / (frame_length-1);
157 |   for (int32 i = 0; i < frame_length; i++) {
158 |     double i_fl = static_cast<double>(i);
159 |     if (opts.window_type == hanning) {
160 |       opts.window[i] = 0.5  - 0.5*cos(a * i_fl);
161 |     } else if (opts.window_type == sine) {
162 |       // when you are checking ws wikipedia, please
163 |       // note that 0.5 * a = M_PI/(frame_length-1)
164 |       opts.window[i] = sin(0.5 * a * i_fl);
165 |     } else if (opts.window_type == hamming) {
166 |       opts.window[i] = 0.54 - 0.46*cos(a * i_fl);
167 |     } else if (opts.window_type == povey) {  // like hamming but goes to zero at edges.
168 |       opts.window[i] = pow(0.5 - 0.5*cos(a * i_fl), 0.85);
169 |     } else if (opts.window_type == rectangular) {
170 |       opts.window[i] = 1.0;
171 |     } else if (opts.window_type == blackman) {
172 |       opts.window[i] = opts.blackman_coeff - 0.5*cos(a * i_fl) +
173 |         (0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
174 |     }
175 |   }
176 | }
177 | 
178 | MelBanks::MelBanks(const MelBanksOptions &opts,
179 |                    const FrameExtractionOptions &frame_opts,
180 |                    BaseFloat vtln_warp_factor) {
181 |   int32 num_bins = opts.num_bins;
182 |   BaseFloat sample_freq = frame_opts.samp_freq;
183 |   int32 window_length_padded = frame_opts.PaddedWindowSize();
184 |   int32 num_fft_bins = window_length_padded / 2;
185 |   BaseFloat nyquist = 0.5 * sample_freq;
186 | 
187 |   BaseFloat low_freq = opts.low_freq, high_freq;
188 |   if (opts.high_freq > 0.0)
189 |     high_freq = opts.high_freq;
190 |   else
191 |     high_freq = nyquist + opts.high_freq;
192 | 
193 |   BaseFloat fft_bin_width = sample_freq / window_length_padded;
194 |   // fft-bin width [think of it as Nyquist-freq / half-window-length]
195 | 
196 |   BaseFloat mel_low_freq = MelScale(low_freq);
197 |   BaseFloat mel_high_freq = MelScale(high_freq);
198 | 
199 |   // divide by num_bins+1 in next line because of end-effects where the bins
200 |   // spread out to the sides.
201 |   BaseFloat mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins+1);
202 | 
203 |   BaseFloat vtln_low = opts.vtln_low,
204 |       vtln_high = opts.vtln_high;
205 |   if (vtln_high < 0.0) {
206 |     vtln_high += nyquist;
207 |   }
208 | 
209 |   bins_.resize(num_bins);
210 |   center_freqs_.resize(num_bins);
211 | 
212 |   for (int32 bin = 0; bin < num_bins; bin++) {
213 |     BaseFloat left_mel = mel_low_freq + bin * mel_freq_delta,
214 |         center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
215 |         right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
216 | 
217 |     if (vtln_warp_factor != 1.0) {
218 |       left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
219 |                                  vtln_warp_factor, left_mel);
220 |       center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
221 |                                  vtln_warp_factor, center_mel);
222 |       right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
223 |                                   vtln_warp_factor, right_mel);
224 |     }
225 |     center_freqs_[bin] = InverseMelScale(center_mel);
226 |     // this_bin will be a vector of coefficients that is only
227 |     // nonzero where this mel bin is active.
228 |     vector<BaseFloat> this_bin(num_fft_bins);
229 |     int32 first_index = -1, last_index = -1;
230 |     for (int32 i = 0; i < num_fft_bins; i++) {
231 |       BaseFloat freq = (fft_bin_width * i);  // Center frequency of this fft
232 |                                              // bin.
233 |       BaseFloat mel = MelScale(freq);
234 |       if (mel > left_mel && mel < right_mel) {
235 |         BaseFloat weight;
236 |         if (mel <= center_mel)
237 |           weight = (mel - left_mel) / (center_mel - left_mel);
238 |         else
239 |          weight = (right_mel-mel) / (right_mel-center_mel);
240 |         this_bin[i] = weight;
241 |         if (first_index == -1)
242 |           first_index = i;
243 |         last_index = i;
244 |       }
245 |     }
246 | 
247 |     bins_[bin].first = first_index;
248 |     int32 size = last_index + 1 - first_index;
249 |     bins_[bin].second.resize(size);
250 |     for(int32 i=0; i<size; i++)
251 |     {
252 |         bins_[bin].second[i] = this_bin[first_index+i];
253 |     }
254 |   }
255 | }
256 | 
257 | BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
258 |                                  BaseFloat vtln_high_cutoff,
259 |                                  BaseFloat low_freq,  // upper+lower frequency cutoffs in mel computation
260 |                                  BaseFloat high_freq,
261 |                                  BaseFloat vtln_warp_factor,
262 |                                  BaseFloat freq) {
263 |   /// This computes a VTLN warping function that is not the same as HTK's one,
264 |   /// but has similar inputs (this function has the advantage of never producing
265 |   /// empty bins).
266 | 
267 |   /// This function computes a warp function F(freq), defined between low_freq and
268 |   /// high_freq inclusive, with the following properties:
269 |   ///  F(low_freq) == low_freq
270 |   ///  F(high_freq) == high_freq
271 |   /// The function is continuous and piecewise linear with two inflection
272 |   ///   points.
273 |   /// The lower inflection point (measured in terms of the unwarped
274 |   ///  frequency) is at frequency l, determined as described below.
275 |   /// The higher inflection point is at a frequency h, determined as
276 |   ///   described below.
277 |   /// If l <= f <= h, then F(f) = f/vtln_warp_factor.
278 |   /// If the higher inflection point (measured in terms of the unwarped
279 |   ///   frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
280 |   ///   Since (by the last point) F(h) == h/vtln_warp_factor, then
281 |   ///   max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
282 |   ///   h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
283 |   ///     = vtln_high_cutoff * min(1, vtln_warp_factor).
284 |   /// If the lower inflection point (measured in terms of the unwarped
285 |   ///   frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
286 |   ///   This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
287 |   ///                       = vtln_low_cutoff * max(1, vtln_warp_factor)
288 | 
289 | 
290 |   if (freq < low_freq || freq > high_freq) return freq;  // in case this gets called
291 |   // for out-of-range frequencies, just return the freq.
292 | 
293 |   BaseFloat one = 1.0;
294 |   BaseFloat l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
295 |   BaseFloat h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
296 |   BaseFloat scale = 1.0 / vtln_warp_factor;
297 |   BaseFloat Fl = scale * l;  // F(l);
298 |   BaseFloat Fh = scale * h;  // F(h);
299 |   // slope of left part of the 3-piece linear function
300 |   BaseFloat scale_left = (Fl - low_freq) / (l - low_freq);
301 |   // [slope of center part is just "scale"]
302 | 
303 |   // slope of right part of the 3-piece linear function
304 |   BaseFloat scale_right = (high_freq - Fh) / (high_freq - h);
305 | 
306 |   if (freq < l) {
307 |     return low_freq + scale_left * (freq - low_freq);
308 |   } else if (freq < h) {
309 |     return scale * freq;
310 |   } else {  // freq >= h
311 |     return high_freq + scale_right * (freq - high_freq);
312 |   }
313 | }
314 | 
315 | BaseFloat MelBanks::VtlnWarpMelFreq(BaseFloat vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
316 |                                     BaseFloat vtln_high_cutoff,
317 |                                     BaseFloat low_freq,  // upper+lower frequency cutoffs in mel computation
318 |                                     BaseFloat high_freq,
319 |                                     BaseFloat vtln_warp_factor,
320 |                                     BaseFloat mel_freq) {
321 |   return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
322 |                                low_freq, high_freq,
323 |                                vtln_warp_factor, InverseMelScale(mel_freq)));
324 | }
325 | 
326 | 
327 | // "power_spectrum" contains fft energies.
328 | void MelBanks::Compute(const vector<BaseFloat> &power_spectrum,
329 |                        vector<BaseFloat> &mel_energies_out) const {
330 |   int32 num_bins = bins_.size();
331 | 
332 |   for (int32 i = 0; i < num_bins; i++) {
333 |     int32 offset = bins_[i].first;
334 |     const vector<BaseFloat> &v(bins_[i].second);
335 |     BaseFloat energy = 0.0f;
336 |     for(int32 j=0; j<v.size(); j++)
337 |     {
338 |         energy += v[j] * power_spectrum[offset+j];
339 |     }
340 |     mel_energies_out[i] = energy;
341 |   }
342 | }
343 | 
344 | MfccComputer::MfccComputer()
345 | {
346 |     PrepareFeatureWindowFunction(frameOptions);
347 |     srfft = new SplitRadixRealFft(256);
348 | 
349 |     GetMelBanks(1.0);
350 |     mel_energies_.resize(mfccOptions.mel_opts.num_bins);
351 | 
352 |     int32 num_bins = mfccOptions.mel_opts.num_bins;
353 |     Matrix tmp_dct_matrix;
354 |     PrepareMatrix(&tmp_dct_matrix, num_bins, num_bins);
355 |     ComputeDctMatrix(&tmp_dct_matrix);
356 |     PrepareMatrix(&dct_matrix, mfccOptions.num_ceps, num_bins);
357 |     for(int32 i=0; i<dct_matrix.rows; i++)
358 |     {
359 |         for(int32 j=0; j<dct_matrix.cols; j++)
360 |         {
361 |             dct_matrix.data[i*dct_matrix.cols + j] = tmp_dct_matrix.data[i*tmp_dct_matrix.cols + j];
362 |         }
363 |     }
364 | 
365 |     if (mfccOptions.cepstral_lifter != 0.0) {
366 |         lifter_coeffs_.resize(mfccOptions.num_ceps);
367 |         ComputeLifterCoeffs(mfccOptions.cepstral_lifter, lifter_coeffs_);
368 |     }
369 | }
370 | 
371 | MfccComputer::~MfccComputer()
372 | {
373 |     delete srfft;
374 | }
375 | 
376 | void MfccComputer::ComputeFeatures(const vector<BaseFloat> &wave, BaseFloat sample_freq, BaseFloat vtln_warp, P_Matrix output)
377 | {
378 |     int32 rows_out = NumFrames(wave.size());
379 |     int32 cols_out = mfccOptions.num_ceps;
380 | 
381 |     output->rows = rows_out;
382 |     output->cols = cols_out;
383 | 
384 |     int32 skip = ((16 / sizeof(BaseFloat)) - cols_out % (16 / sizeof(BaseFloat))) % (16 / sizeof(BaseFloat));
385 |     output->stride = cols_out + skip;
386 | 
387 |     output->data.resize(rows_out * cols_out);
388 | 
389 |     vector<BaseFloat> window;  // windowed waveform.
390 |     for (int32 frame = 0; frame < rows_out; ++frame)
391 |     {
392 |         ExtractWindow(wave, frame, vtln_warp, window, output->data.data()+frame*cols_out);
393 |     }
394 | }
395 | 
396 | int32 MfccComputer::NumFrames(int64 num_samples)
397 | {
398 |     int64 frame_shift = frameOptions.WindowShift();
399 |     int64 frame_length = frameOptions.WindowSize();
400 | 
401 |     if (num_samples < frame_length)
402 |     {
403 |         return 0;
404 |     }
405 |     else
406 |     {
407 |         return (1 + ((num_samples - frame_length) / frame_shift));
408 |     }
409 | }
410 | 
411 | void MfccComputer::ExtractWindow(const vector<BaseFloat> &wave, int32 f, BaseFloat vtln_warp, vector<BaseFloat> &window, BaseFloat* output)
412 | {
413 |     int32 frame_length = frameOptions.WindowSize();
414 |     int32 frame_length_padded = frameOptions.PaddedWindowSize();
415 | 
416 |     if(window.size() != frame_length_padded)
417 |     {
418 |         window.resize(frame_length_padded);
419 |     }
420 | 
421 |     memcpy(window.data(), wave.data()+f*frameOptions.WindowShift(), frame_length*sizeof(BaseFloat));
422 |     memset(window.data()+frame_length, 0, (frame_length_padded-frame_length)*sizeof(BaseFloat));
423 | 
424 |     ProcessWindow(window, vtln_warp, output);
425 | }
426 | 
427 | void MfccComputer::ProcessWindow(vector<BaseFloat> window, BaseFloat vtln_warp, BaseFloat* output)
428 | {
429 |     int32 frame_length = frameOptions.WindowSize();
430 | 
431 |     if (frameOptions.dither != 0.0)
432 |     {
433 |         Dither(window, frame_length, frameOptions.dither);
434 |     }
435 | 
436 |     if (frameOptions.remove_dc_offset)
437 |     {
438 |         BaseFloat offset = -Sum(window) / frame_length;
439 |         for(int i=0; i<frame_length; i++)
440 |         {
441 |             window[i] += offset;
442 |         }
443 |     }
444 | 
445 |     if (frameOptions.preemph_coeff != 0.0)
446 |     {
447 |         Preemphasize(window, frame_length, frameOptions.preemph_coeff);
448 |     }
449 | 
450 |     for(int i=0; i<frame_length; i++)
451 |     {
452 |         window[i] *= frameOptions.window[i];
453 |     }
454 | 
455 |     srfft->Compute(window.data(), true);
456 | 
457 |     ComputePowerSpectrum(window);
458 | 
459 |     const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
460 |     mel_banks.Compute(window, mel_energies_);
461 | 
462 |     ApplyFloor(mel_energies_, std::numeric_limits<float>::epsilon());
463 |     ApplyLog(mel_energies_);
464 | 
465 |     for(int32 i=0; i<mfccOptions.num_ceps; i++)
466 |     {
467 |         output[i] = 0.0f;
468 |         for(int32 j=0; j<mel_energies_.size(); j++)
469 |         {
470 |             output[i] += mel_energies_[j] * dct_matrix.data[i*dct_matrix.cols + j];
471 |         }
472 |     }
473 | 
474 |     if (mfccOptions.cepstral_lifter != 0.0)
475 |     {
476 |         for(int32 i=0; i<mfccOptions.num_ceps; i++)
477 |         {
478 |             output[i] *= lifter_coeffs_[i];
479 |         }
480 |     }
481 | }
482 | 
483 | const MelBanks *MfccComputer::GetMelBanks(BaseFloat vtln_warp) {
484 |   MelBanks *this_mel_banks = NULL;
485 |   std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
486 |   if (iter == mel_banks_.end()) {
487 |     this_mel_banks = new MelBanks(mfccOptions.mel_opts,
488 |                                   frameOptions,
489 |                                   vtln_warp);
490 |     mel_banks_[vtln_warp] = this_mel_banks;
491 |   } else {
492 |     this_mel_banks = iter->second;
493 |   }
494 |   return this_mel_banks;
495 | }


--------------------------------------------------------------------------------
/src/fstreader.cpp:
--------------------------------------------------------------------------------
  1 | #include "fstreader.h"
  2 | 
  3 | const int32 fstMagicNumber = 2125659606;
  4 | 
  5 | bool FstHeader::Read(const char* fileName)
  6 | {
  7 |     FILE *fp;
  8 |     fp = fopen(fileName, "rb");
  9 | 
 10 |     if(fp == NULL)
 11 |     {
 12 |         printf("Error opening fst file\n");
 13 |         return false;
 14 |     }
 15 | 
 16 |     int32 magic_number = 0;
 17 |     ReadInt(&magic_number, sizeof(magic_number), fp);
 18 |     if (magic_number != fstMagicNumber) {
 19 |         printf("FstHeader::Read: Bad FST header\n");
 20 |         return false;
 21 |     }
 22 | 
 23 |     ReadString(&fsttype, fp);
 24 |     ReadString(&arctype, fp);
 25 |     ReadInt(&version, sizeof(version), fp);
 26 |     ReadInt(&flags, sizeof(flags), fp);
 27 |     ReadInt(&properties, sizeof(properties), fp);
 28 |     ReadInt(&start, sizeof(start), fp);
 29 |     ReadInt(&numstates, sizeof(numstates), fp);
 30 |     ReadInt(&numarcs, sizeof(numarcs), fp);
 31 | 
 32 |     fclose(fp);
 33 |     return true;
 34 | }
 35 | 
 36 | void FstHeader::ReadString(char **buf, FILE *fp)
 37 | {
 38 |     uint32 len = 0;
 39 |     fread(&len, sizeof(len), 1, fp);
 40 |     *buf = (char*)malloc(len+1);
 41 |     memset(*buf, 0, len+1);
 42 |     fread(*buf, len, 1, fp);
 43 | }
 44 | 
 45 | void FstHeader::ReadInt(void *buf, int bytes, FILE *fp)
 46 | {
 47 |     fread(buf, bytes, 1, fp);
 48 | }
 49 | 
 50 | FstHeader::~FstHeader()
 51 | {
 52 |     SAFE_FREE(fsttype);
 53 |     SAFE_FREE(arctype);
 54 | }
 55 | 
 56 | 
 57 | bool FstReader::Read(const char* fileName)
 58 | {
 59 |     if(!hdr.Read(fileName))
 60 |     {
 61 |         return false;
 62 |     }
 63 | 
 64 |     FILE *fp;
 65 |     fp = fopen(fileName, "rb");
 66 | 
 67 |     if(fp == NULL)
 68 |     {
 69 |         printf("Error opening fst file\n");
 70 |         return false;
 71 |     }
 72 | 
 73 |     //65 bytes header
 74 |     fseek(fp, 65, SEEK_SET);
 75 |     //Check the type of Arc
 76 | 
 77 |     //Read the FST
 78 |     //20 bytes per state
 79 |     state = (P_State)malloc(hdr.numstates * sizeof(State));
 80 |     for(int64 i=0; i<hdr.numstates; i++)
 81 |     {
 82 |         fread(&state[i], 20, 1, fp);
 83 |     }
 84 | 
 85 |     //16 bytes per arc
 86 |     arc = (P_Arc)malloc(hdr.numarcs * sizeof(Arc));
 87 |     fread(arc, sizeof(Arc), hdr.numarcs, fp);
 88 | 
 89 |     //Assign arcs to states
 90 |     int64 offset = 0;
 91 |     for(int64 i=0; i<hdr.numstates; i++)
 92 |     {
 93 |         state[i].arc = &arc[offset];
 94 |         offset += state[i].arcNum;
 95 |     }
 96 | 
 97 |     fclose(fp);
 98 |     return true;
 99 | }
100 | 
101 | int FstReader::Start()
102 | {
103 |     return hdr.start;
104 | }
105 | 
106 | FstReader::~FstReader()
107 | {
108 |     SAFE_FREE(state);
109 |     SAFE_FREE(arc);
110 | }


--------------------------------------------------------------------------------
/src/simple-decoder.cpp:
--------------------------------------------------------------------------------
  1 | #include "simple-decoder.h"
  2 | 
  3 | static void TokenDelete(Token *tok)
  4 | {
  5 |     while(--tok->ref_count == 0)
  6 |     {
  7 |         Token *prev = tok->prev;
  8 |         SAFE_FREE(tok);
  9 |         if(prev == NULL)
 10 |         {
 11 |             return;
 12 |         }
 13 |         else tok = prev;
 14 |     }
 15 | }
 16 | 
 17 | static BaseFloat GetDecodeArcWeight(P_DecodeArc arc)
 18 | {
 19 |     return arc->weight1 + arc->weight2;
 20 | }
 21 | 
 22 | static P_Token newToken(P_DecodeArc arc, BaseFloat acoustic_cost, Token *prev)
 23 | {
 24 |     P_Token token = (P_Token)malloc(sizeof(Token));
 25 | 
 26 |     token->arc.ilabel = arc->ilabel;
 27 |     token->arc.olabel = arc->olabel;
 28 |     token->arc.weight1 = GetDecodeArcWeight(arc);
 29 |     token->arc.weight2 = acoustic_cost;
 30 |     token->arc.nextstate = arc->nextstate;
 31 | 
 32 |     token->prev = prev;
 33 |     token->ref_count = 1;
 34 | 
 35 |     if(prev)
 36 |     {
 37 |         prev->ref_count++;
 38 |         token->cost = prev->cost + (GetDecodeArcWeight(arc) + acoustic_cost);
 39 |     }
 40 |     else
 41 |     {
 42 |         token->cost = GetDecodeArcWeight(arc) + acoustic_cost;
 43 |     }
 44 | 
 45 |     return token;
 46 | }
 47 | 
 48 | static P_Token newToken(P_Arc arc, BaseFloat acoustic_cost, Token *prev)
 49 | {
 50 |     P_Token token = (P_Token)malloc(sizeof(Token));
 51 | 
 52 |     token->arc.ilabel = arc->ilabel;
 53 |     token->arc.olabel = arc->olabel;
 54 |     token->arc.weight1 = arc->weight;
 55 |     token->arc.weight2 = acoustic_cost;
 56 |     token->arc.nextstate = arc->nextstate;
 57 | 
 58 |     token->prev = prev;
 59 |     token->ref_count = 1;
 60 | 
 61 |     if(prev)
 62 |     {
 63 |         prev->ref_count++;
 64 |         token->cost = prev->cost + (arc->weight + acoustic_cost);
 65 |     }
 66 |     else
 67 |     {
 68 |         token->cost = arc->weight + acoustic_cost;
 69 |     }
 70 | 
 71 |     return token;
 72 | }
 73 | 
 74 | SimpleDecoder::SimpleDecoder(TransitionModel *transmodel, AmDiagGmm *amgmm, FstReader *fst, BaseFloat beam)
 75 | {
 76 |     m_transmodel = transmodel;
 77 |     m_amgmm = amgmm;
 78 |     m_fst = fst;
 79 |     m_beam = beam;
 80 | }
 81 | 
 82 | void SimpleDecoder::InitDecoding()
 83 | {
 84 |     // clean up from last time:
 85 |     ClearToks(cur_toks);
 86 |     ClearToks(prev_toks);
 87 |     // initialize decoding:
 88 |     StateId start_state = m_fst->Start();
 89 | 
 90 |     DecodeArc dummy_arc;
 91 |     dummy_arc.ilabel = 0;
 92 |     dummy_arc.olabel = 0;
 93 |     dummy_arc.weight1 = 0;
 94 |     dummy_arc.weight2 = 0;
 95 |     dummy_arc.nextstate = start_state;
 96 | 
 97 |     cur_toks[start_state] = newToken(&dummy_arc, 0.0, NULL);
 98 | 
 99 |     num_frames_decoded = 0;
100 |     ProcessNonemitting();
101 | }
102 | 
103 | bool SimpleDecoder::Decode(P_Matrix feature, BaseFloat acoustic_scale)
104 | {
105 |     InitDecoding();
106 |     AdvanceDecoding(feature, acoustic_scale);
107 |     return (!cur_toks.empty());
108 | }
109 | 
110 | vector<int> SimpleDecoder::GetBestPath()
111 | {
112 |     Token* best_token;
113 |     BaseFloat best_cost = std::numeric_limits<double>::infinity();
114 | 
115 |     for(map<StateId, Token*>::iterator iter = cur_toks.begin();
116 |         iter != cur_toks.end(); ++iter)
117 |     {
118 |         if(best_cost > iter->second->cost)
119 |         {
120 |             best_cost = iter->second->cost;
121 |             best_token = iter->second;
122 |         }
123 |     }
124 | 
125 |     vector<int> result_rev;
126 |     Token* path = best_token;
127 |     while(path != NULL)
128 |     {
129 |         if(path->arc.olabel != 0)
130 |         {
131 |             result_rev.push_back(path->arc.olabel);
132 |         }
133 |         path = path->prev;
134 |     }
135 | 
136 |     vector<int> result;
137 |     for(int i=result_rev.size()-1; i>=0; i--)
138 |     {
139 |         result.push_back(result_rev[i]);
140 |     }
141 | 
142 |     return result;
143 | }
144 | 
145 | void SimpleDecoder::AdvanceDecoding(P_Matrix feature, BaseFloat acoustic_scale)
146 | {
147 |     while (num_frames_decoded < feature->rows)
148 |     {
149 |         // note: ProcessEmitting() increments num_frames_decoded_
150 |         ClearToks(prev_toks);
151 |         cur_toks.swap(prev_toks);
152 |         ProcessEmitting(feature, acoustic_scale);
153 |         ProcessNonemitting();
154 |         PruneToks(m_beam, &cur_toks);
155 |     }
156 | }
157 | 
158 | void SimpleDecoder::ProcessEmitting(P_Matrix feature, BaseFloat acoustic_scale)
159 | {
160 |     int32 frame = num_frames_decoded;
161 |     // Processes emitting arcs for one frame.  Propagates from
162 |     // prev_toks_ to cur_toks_.
163 |     double cutoff = numeric_limits<BaseFloat>::infinity();
164 |     for(map<StateId, Token*>::iterator iter = prev_toks.begin();
165 |        iter != prev_toks.end();
166 |        ++iter)
167 |     {
168 |         StateId state = iter->first;
169 |         Token *tok = iter->second;
170 |         for(int i=0; i<m_fst->state[state].arcNum; i++)
171 |         {
172 |             P_Arc arc = &m_fst->state[state].arc[i];
173 |             if(arc->ilabel != 0)
174 |             {
175 |                 // propagate..
176 |                 BaseFloat acoustic_cost = -acoustic_scale * LogLikelihood(feature, frame, arc->ilabel);
177 |                 double total_cost = tok->cost + arc->weight + acoustic_cost;
178 | 
179 |                 if(total_cost >= cutoff)
180 |                 {
181 |                     continue;
182 |                 }
183 |                 if(total_cost + m_beam < cutoff)
184 |                 {
185 |                     cutoff = total_cost + m_beam;
186 |                 }
187 | 
188 |                 Token *new_tok = newToken(arc, acoustic_cost, tok);
189 |                 map<StateId, Token*>::iterator find_iter = cur_toks.find(arc->nextstate);
190 |                 if(find_iter == cur_toks.end())
191 |                 {
192 |                     cur_toks[arc->nextstate] = new_tok;
193 |                 }
194 |                 else
195 |                 {
196 |                     if(find_iter->second->cost > new_tok->cost)
197 |                     {
198 |                         TokenDelete(find_iter->second);
199 |                         find_iter->second = new_tok;
200 |                     }
201 |                     else
202 |                     {
203 |                         TokenDelete(new_tok);
204 |                     }
205 |                }
206 |             }
207 |         }
208 |     }
209 |     num_frames_decoded++;
210 | }
211 | 
212 | void SimpleDecoder::ProcessNonemitting()
213 | {
214 |     // Processes nonemitting arcs for one frame.  Propagates within
215 |     // cur_toks_.
216 |     vector<StateId> queue;
217 |     double infinity = std::numeric_limits<double>::infinity();
218 |     double best_cost = infinity;
219 | 
220 |     for(map<StateId, Token*>::iterator iter = cur_toks.begin();
221 |         iter != cur_toks.end();
222 |         ++iter)
223 |     {
224 |         queue.push_back(iter->first);
225 |         best_cost = min(best_cost, iter->second->cost);
226 |     }
227 |     double cutoff = best_cost + m_beam;
228 | 
229 |     while(!queue.empty())
230 |     {
231 |         StateId state = queue.back();
232 |         queue.pop_back();
233 |         Token *tok = cur_toks[state];
234 |         for(int i=0; i<m_fst->state[state].arcNum; i++)
235 |         {
236 |             P_Arc arc = &m_fst->state[state].arc[i];
237 | 
238 |             if(arc->ilabel == 0)
239 |             {   // propagate nonemitting only...
240 |                 const BaseFloat acoustic_cost = 0.0;
241 |                 Token *new_tok = newToken(arc, acoustic_cost, tok);
242 |                 if(new_tok->cost > cutoff)
243 |                 {
244 |                     TokenDelete(new_tok);
245 |                 }
246 |                 else
247 |                 {
248 |                     map<StateId, Token*>::iterator find_iter = cur_toks.find(arc->nextstate);
249 |                     if(find_iter == cur_toks.end())
250 |                     {
251 |                         cur_toks[arc->nextstate] = new_tok;
252 |                         queue.push_back(arc->nextstate);
253 |                     }
254 |                     else
255 |                     {
256 |                         if(find_iter->second->cost > new_tok->cost)
257 |                         {
258 |                             TokenDelete(find_iter->second);
259 |                             find_iter->second = new_tok;
260 |                             queue.push_back(arc->nextstate);
261 |                         }
262 |                         else
263 |                         {
264 |                             TokenDelete(new_tok);
265 |                         }
266 |                     }
267 |                 }
268 |             }
269 |         }
270 |     }
271 | }
272 | 
273 | static const BaseFloat kMinLogDiffFloat = logf(FLT_EPSILON);
274 | 
275 | BaseFloat LogSumExp(vector<BaseFloat> input, BaseFloat prune)
276 | {
277 |     BaseFloat max_elem = input[0];
278 |     for(int i=1; i<input.size(); i++)
279 |     {
280 |         if(max_elem < input[i])
281 |         {
282 |             max_elem = input[i];
283 |         }
284 |     }
285 |     BaseFloat cutoff;
286 |     cutoff = max_elem + kMinLogDiffFloat;
287 |     if (prune > 0.0 && max_elem - prune > cutoff) // explicit pruning...
288 |     {
289 |         cutoff = max_elem - prune;
290 |     }
291 | 
292 |     double sum_relto_max_elem = 0.0;
293 | 
294 |     for(int i = 0; i < input.size(); i++)
295 |     {
296 |         BaseFloat f = input[i];
297 |         if (f >= cutoff)
298 |         {
299 |             sum_relto_max_elem += expf(f - max_elem);
300 |         }
301 |     }
302 |     return max_elem + logf(sum_relto_max_elem);
303 | }
304 | 
305 | BaseFloat SimpleDecoder::LogLikelihood(P_Matrix feature, int32 frame, int32 tid)
306 | {
307 |     int32 state = m_transmodel->TransitionIdToPdf(tid);
308 | 
309 |     vector<BaseFloat> data;
310 |     vector<BaseFloat> data_squared;
311 |     for(int i=0; i<feature->cols; i++)
312 |     {
313 |         BaseFloat v = ReadMatrix(feature, frame, i);
314 |         data.push_back(v);
315 |         data_squared.push_back(v*v);
316 |     }
317 | 
318 |     DiagGmm& pdf = m_amgmm->GetPdf(state);
319 | 
320 |     vector<BaseFloat> loglikes;
321 |     for(int i=0; i<pdf.gconsts.size(); i++)
322 |     {
323 |         loglikes.push_back(pdf.gconsts[i]);
324 |     }
325 | 
326 |     for(int i=0; i<loglikes.size(); i++)
327 |     {
328 |         BaseFloat sum = 0.0;
329 |         for(int j=0; j<pdf.means_invvars.cols; j++)
330 |         {
331 |             sum += 1.0 * data[j] * ReadMatrix(&pdf.means_invvars, i, j);
332 |             sum += -0.5 * data_squared[j] * ReadMatrix(&pdf.inv_vars, i, j);
333 |         }
334 |         loglikes[i] += sum;
335 |     }
336 | 
337 |     BaseFloat log_sum_exp_prune = -1;
338 | 
339 |     return LogSumExp(loglikes, log_sum_exp_prune);
340 | }
341 | 
342 | void SimpleDecoder::ClearToks(map<StateId, Token*> &toks) {
343 |     for(map<StateId, Token*>::iterator iter = toks.begin();
344 |         iter != toks.end(); ++iter)
345 |     {
346 |         TokenDelete(iter->second);
347 |     }
348 |     toks.clear();
349 | }
350 | 
351 | void SimpleDecoder::PruneToks(BaseFloat beam, map<StateId, Token*> *toks)
352 | {
353 |     if(toks->empty())
354 |     {
355 |         printf("No tokens to prune.\n");
356 |         return;
357 |     }
358 |     double best_cost = numeric_limits<double>::infinity();
359 |     for(map<StateId, Token*>::iterator iter = toks->begin();
360 |        iter != toks->end(); ++iter)
361 |     {
362 |         best_cost = min(best_cost, iter->second->cost);
363 |     }
364 | 
365 |     vector<StateId> retained;
366 |     double cutoff = best_cost + beam;
367 |     for(map<StateId, Token*>::iterator iter = toks->begin();
368 |        iter != toks->end(); ++iter)
369 |     {
370 |         if(iter->second->cost < cutoff)
371 |         {
372 |             retained.push_back(iter->first);
373 |         }
374 |         else
375 |         {
376 |             TokenDelete(iter->second);
377 |         }
378 |     }
379 |     map<StateId, Token*> tmp;
380 |     for (size_t i = 0; i < retained.size(); i++)
381 |     {
382 |         tmp[retained[i]] = (*toks)[retained[i]];
383 |     }
384 |     printf("Pruned to %lu toks.\n", retained.size());
385 |     tmp.swap(*toks);
386 | }


--------------------------------------------------------------------------------
/src/srfft.cpp:
--------------------------------------------------------------------------------
  1 | #include "srfft.h"
  2 | 
  3 | SplitRadixComplexFft::SplitRadixComplexFft(int32 N)
  4 | {
  5 |     N_ = N;
  6 |     logn_ = 0;
  7 |     while (N > 1) {
  8 |       N >>= 1;
  9 |       logn_ ++;
 10 |     }
 11 |     ComputeTables();
 12 | }
 13 | 
 14 | SplitRadixComplexFft::~SplitRadixComplexFft()
 15 | {
 16 |     delete [] brseed_;
 17 |     if (tab_ != NULL) {
 18 |         for (int32 i = 0; i < logn_-3; i++)
 19 |         {
 20 |             delete [] tab_[i];
 21 |         }
 22 |         delete [] tab_;
 23 |     }
 24 | }
 25 | 
 26 | void SplitRadixComplexFft::Compute(BaseFloat *xr, BaseFloat *xi, bool forward) const {
 27 |   if (!forward) {  // reverse real and imaginary parts for complex FFT.
 28 |     BaseFloat *tmp = xr;
 29 |     xr = xi;
 30 |     xi = tmp;
 31 |   }
 32 |   ComputeRecursive(xr, xi, logn_);
 33 |   if (logn_ > 1) {
 34 |     BitReversePermute(xr, logn_);
 35 |     BitReversePermute(xi, logn_);
 36 |   }
 37 | }
 38 | 
 39 | void SplitRadixComplexFft::Compute(BaseFloat *x, bool forward,
 40 |                                          std::vector<BaseFloat> *temp_buffer) const {
 41 |   if (temp_buffer->size() != N_)
 42 |     temp_buffer->resize(N_);
 43 |   BaseFloat *temp_ptr = &((*temp_buffer)[0]);
 44 |   for (int32 i = 0; i < N_; i++) {
 45 |     x[i] = x[i * 2];  // put the real part in the first half of x.
 46 |     temp_ptr[i] = x[i * 2 + 1];  // put the imaginary part in temp_buffer.
 47 |   }
 48 |   // copy the imaginary part back to the second half of x.
 49 |   memcpy(static_cast<void*>(x + N_),
 50 |          static_cast<void*>(temp_ptr),
 51 |          sizeof(BaseFloat) * N_);
 52 | 
 53 |   Compute(x, x + N_, forward);
 54 |   // Now change the format back to interleaved.
 55 |   memcpy(static_cast<void*>(temp_ptr),
 56 |          static_cast<void*>(x + N_),
 57 |          sizeof(BaseFloat) * N_);
 58 |   for (int32 i = N_-1; i > 0; i--) {  // don't include 0,
 59 |     // in case MatrixIndexT is unsigned, the loop would not terminate.
 60 |     // Treat it as a special case.
 61 |     x[i*2] = x[i];
 62 |     x[i*2 + 1] = temp_ptr[i];
 63 |   }
 64 |   x[1] = temp_ptr[0];  // special case of i = 0.
 65 | }
 66 | 
 67 | void SplitRadixComplexFft::Compute(BaseFloat *x, bool forward) {
 68 |   this->Compute(x, forward, &temp_buffer_);
 69 | }
 70 | 
 71 | void SplitRadixComplexFft::BitReversePermute(BaseFloat *x, int32 logn) const {
 72 |   int32      i, j, lg2, n;
 73 |   int32      off, fj, gno, *brp;
 74 |   BaseFloat    tmp, *xp, *xq;
 75 | 
 76 |   lg2 = logn >> 1;
 77 |   n = 1 << lg2;
 78 |   if (logn & 1) lg2++;
 79 | 
 80 |   /* Unshuffling loop */
 81 |   for (off = 1; off < n; off++) {
 82 |     fj = n * brseed_[off]; i = off; j = fj;
 83 |     tmp = x[i]; x[i] = x[j]; x[j] = tmp;
 84 |     xp = &x[i];
 85 |     brp = &(brseed_[1]);
 86 |     for (gno = 1; gno < brseed_[off]; gno++) {
 87 |       xp += n;
 88 |       j = fj + *brp++;
 89 |       xq = x + j;
 90 |       tmp = *xp; *xp = *xq; *xq = tmp;
 91 |     }
 92 |   }
 93 | }
 94 | 
 95 | void SplitRadixComplexFft::ComputeRecursive(BaseFloat *xr, BaseFloat *xi, int32 logn) const {
 96 | 
 97 |   int32    m, m2, m4, m8, nel, n;
 98 |   BaseFloat    *xr1, *xr2, *xi1, *xi2;
 99 |   BaseFloat    *cn = nullptr, *spcn = nullptr, *smcn = nullptr, *c3n = nullptr,
100 |     *spc3n = nullptr, *smc3n = nullptr;
101 |   BaseFloat    tmp1, tmp2;
102 |   BaseFloat   sqhalf = M_SQRT1_2;
103 | 
104 |   /* Compute trivial cases */
105 |   if (logn < 3) {
106 |     if (logn == 2) {  /* length m = 4 */
107 |       xr2  = xr + 2;
108 |       xi2  = xi + 2;
109 |       tmp1 = *xr + *xr2;
110 |       *xr2 = *xr - *xr2;
111 |       *xr  = tmp1;
112 |       tmp1 = *xi + *xi2;
113 |       *xi2 = *xi - *xi2;
114 |       *xi  = tmp1;
115 |       xr1  = xr + 1;
116 |       xi1  = xi + 1;
117 |       xr2++;
118 |       xi2++;
119 |       tmp1 = *xr1 + *xr2;
120 |       *xr2 = *xr1 - *xr2;
121 |       *xr1 = tmp1;
122 |       tmp1 = *xi1 + *xi2;
123 |       *xi2 = *xi1 - *xi2;
124 |       *xi1 = tmp1;
125 |       xr2  = xr + 1;
126 |       xi2  = xi + 1;
127 |       tmp1 = *xr + *xr2;
128 |       *xr2 = *xr - *xr2;
129 |       *xr  = tmp1;
130 |       tmp1 = *xi + *xi2;
131 |       *xi2 = *xi - *xi2;
132 |       *xi  = tmp1;
133 |       xr1  = xr + 2;
134 |       xi1  = xi + 2;
135 |       xr2  = xr + 3;
136 |       xi2  = xi + 3;
137 |       tmp1 = *xr1 + *xi2;
138 |       tmp2 = *xi1 + *xr2;
139 |       *xi1 = *xi1 - *xr2;
140 |       *xr2 = *xr1 - *xi2;
141 |       *xr1 = tmp1;
142 |       *xi2 = tmp2;
143 |       return;
144 |     }
145 |     else if (logn == 1) {   /* length m = 2 */
146 |       xr2  = xr + 1;
147 |       xi2  = xi + 1;
148 |       tmp1 = *xr + *xr2;
149 |       *xr2 = *xr - *xr2;
150 |       *xr  = tmp1;
151 |       tmp1 = *xi + *xi2;
152 |       *xi2 = *xi - *xi2;
153 |       *xi  = tmp1;
154 |       return;
155 |     }
156 |     else if (logn == 0) return;   /* length m = 1 */
157 |   }
158 | 
159 |   /* Compute a few constants */
160 |   m = 1 << logn; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
161 | 
162 | 
163 |   /* Step 1 */
164 |   xr1 = xr; xr2 = xr1 + m2;
165 |   xi1 = xi; xi2 = xi1 + m2;
166 |   for (n = 0; n < m2; n++) {
167 |     tmp1 = *xr1 + *xr2;
168 |     *xr2 = *xr1 - *xr2;
169 |     xr2++;
170 |     *xr1++ = tmp1;
171 |     tmp2 = *xi1 + *xi2;
172 |     *xi2 = *xi1 - *xi2;
173 |     xi2++;
174 |     *xi1++ = tmp2;
175 |   }
176 | 
177 |   /* Step 2 */
178 |   xr1 = xr + m2; xr2 = xr1 + m4;
179 |   xi1 = xi + m2; xi2 = xi1 + m4;
180 |   for (n = 0; n < m4; n++) {
181 |     tmp1 = *xr1 + *xi2;
182 |     tmp2 = *xi1 + *xr2;
183 |     *xi1 = *xi1 - *xr2;
184 |     xi1++;
185 |     *xr2++ = *xr1 - *xi2;
186 |     *xr1++ = tmp1;
187 |     *xi2++ = tmp2;
188 |     // xr1++; xr2++; xi1++; xi2++;
189 |   }
190 | 
191 |   /* Steps 3 & 4 */
192 |   xr1 = xr + m2; xr2 = xr1 + m4;
193 |   xi1 = xi + m2; xi2 = xi1 + m4;
194 |   if (logn >= 4) {
195 |     nel = m4 - 2;
196 |     cn  = tab_[logn-4]; spcn  = cn + nel;  smcn  = spcn + nel;
197 |     c3n = smcn + nel;  spc3n = c3n + nel; smc3n = spc3n + nel;
198 |   }
199 |   xr1++; xr2++; xi1++; xi2++;
200 |   // xr1++; xi1++;
201 |   for (n = 1; n < m4; n++) {
202 |     if (n == m8) {
203 |       tmp1 =  sqhalf * (*xr1 + *xi1);
204 |       *xi1 =  sqhalf * (*xi1 - *xr1);
205 |       *xr1 =  tmp1;
206 |       tmp2 =  sqhalf * (*xi2 - *xr2);
207 |       *xi2 = -sqhalf * (*xr2 + *xi2);
208 |       *xr2 =  tmp2;
209 |     } else {
210 |       tmp2 = *cn++ * (*xr1 + *xi1);
211 |       tmp1 = *spcn++ * *xr1 + tmp2;
212 |       *xr1 = *smcn++ * *xi1 + tmp2;
213 |       *xi1 = tmp1;
214 |       tmp2 = *c3n++ * (*xr2 + *xi2);
215 |       tmp1 = *spc3n++ * *xr2 + tmp2;
216 |       *xr2 = *smc3n++ * *xi2 + tmp2;
217 |       *xi2 = tmp1;
218 |     }
219 |     xr1++; xr2++; xi1++; xi2++;
220 |   }
221 | 
222 |   /* Call ssrec again with half DFT length */
223 |   ComputeRecursive(xr, xi, logn-1);
224 | 
225 |   /* Call ssrec again twice with one quarter DFT length.
226 |      Constants have to be recomputed, because they are static! */
227 |   // m = 1 << logn; m2 = m / 2;
228 |   ComputeRecursive(xr + m2, xi + m2, logn - 2);
229 |   // m = 1 << logn;
230 |   m4 = 3 * (m / 4);
231 |   ComputeRecursive(xr + m4, xi + m4, logn - 2);
232 | }
233 | 
234 | void SplitRadixComplexFft::ComputeTables() {
235 |   int32    imax, lg2, i, j;
236 |   int32     m, m2, m4, m8, nel, n;
237 |   BaseFloat    *cn, *spcn, *smcn, *c3n, *spc3n, *smc3n;
238 |   BaseFloat    ang, c, s;
239 | 
240 |   lg2 = logn_ >> 1;
241 |   if (logn_ & 1) lg2++;
242 |   brseed_ = new int32[1 << lg2];
243 |   brseed_[0] = 0;
244 |   brseed_[1] = 1;
245 |   for (j = 2; j <= lg2; j++) {
246 |     imax = 1 << (j - 1);
247 |     for (i = 0; i < imax; i++) {
248 |       brseed_[i] <<= 1;
249 |       brseed_[i + imax] = brseed_[i] + 1;
250 |     }
251 |   }
252 | 
253 |   if (logn_ < 4) {
254 |     tab_ = NULL;
255 |   } else {
256 |     tab_ = new BaseFloat* [logn_-3];
257 |     for (i = logn_; i>=4 ; i--) {
258 |       /* Compute a few constants */
259 |       m = 1 << i; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
260 | 
261 |       /* Allocate memory for tables */
262 |       nel = m4 - 2;
263 | 
264 |       tab_[i-4] = new BaseFloat[6*nel];
265 | 
266 |       /* Initialize pointers */
267 |       cn = tab_[i-4]; spcn  = cn + nel;  smcn  = spcn + nel;
268 |       c3n = smcn + nel;  spc3n = c3n + nel; smc3n = spc3n + nel;
269 | 
270 |       /* Compute tables */
271 |       for (n = 1; n < m4; n++) {
272 |         if (n == m8) continue;
273 |         ang = n * M_2PI / m;
274 |         c = cos(ang); s = sin(ang);
275 |         *cn++ = c; *spcn++ = - (s + c); *smcn++ = s - c;
276 |         ang = 3 * n * M_2PI / m;
277 |         c = cos(ang); s = sin(ang);
278 |         *c3n++ = c; *spc3n++ = - (s + c); *smc3n++ = s - c;
279 |       }
280 |     }
281 |   }
282 | }
283 | 
284 | inline void ComplexMul(const BaseFloat &a_re, const BaseFloat &a_im,
285 |                                             BaseFloat *b_re, BaseFloat *b_im) {
286 |   BaseFloat tmp_re = (*b_re * a_re) - (*b_im * a_im);
287 |   *b_im = *b_re * a_im + *b_im * a_re;
288 |   *b_re = tmp_re;
289 | }
290 | 
291 | inline void ComplexAddProduct(const BaseFloat &a_re, const BaseFloat &a_im,
292 |                                                    const BaseFloat &b_re, const BaseFloat &b_im,
293 |                                                    BaseFloat *c_re, BaseFloat *c_im) {
294 |   *c_re += b_re*a_re - b_im*a_im;
295 |   *c_im += b_re*a_im + b_im*a_re;
296 | }
297 | 
298 | 
299 | inline void ComplexImExp(BaseFloat x, BaseFloat *a_re, BaseFloat *a_im) {
300 |   *a_re = cos(x);
301 |   *a_im = sin(x);
302 | }
303 | 
304 | void SplitRadixRealFft::Compute(BaseFloat *data, bool forward) {
305 |   Compute(data, forward, &this->temp_buffer_);
306 | }
307 | 
308 | 
309 | // This code is mostly the same as the RealFft function.  It would be
310 | // possible to replace it with more efficient code from Rico's book.
311 | void SplitRadixRealFft::Compute(BaseFloat *data, bool forward,
312 |                                       std::vector<BaseFloat> *temp_buffer) const {
313 |   int32 N = N_, N2 = N/2;
314 | 
315 |   if (forward) // call to base class
316 |     SplitRadixComplexFft::Compute(data, true, temp_buffer);
317 | 
318 |   BaseFloat rootN_re, rootN_im;  // exp(-2pi/N), forward; exp(2pi/N), backward
319 |   int forward_sign = forward ? -1 : 1;
320 |   ComplexImExp(static_cast<BaseFloat>(M_2PI/N *forward_sign), &rootN_re, &rootN_im);
321 |   BaseFloat kN_re = -forward_sign, kN_im = 0.0;  // exp(-2pik/N), forward; exp(-2pik/N), backward
322 |   // kN starts out as 1.0 for forward algorithm but -1.0 for backward.
323 |   for (int32 k = 1; 2*k <= N2; k++) {
324 |     ComplexMul(rootN_re, rootN_im, &kN_re, &kN_im);
325 | 
326 |     BaseFloat Ck_re, Ck_im, Dk_re, Dk_im;
327 |     // C_k = 1/2 (B_k + B_{N/2 - k}^*) :
328 |     Ck_re = 0.5 * (data[2*k] + data[N - 2*k]);
329 |     Ck_im = 0.5 * (data[2*k + 1] - data[N - 2*k + 1]);
330 |     // re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})):
331 |     Dk_re = 0.5 * (data[2*k + 1] + data[N - 2*k + 1]);
332 |     // im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))
333 |     Dk_im =-0.5 * (data[2*k] - data[N - 2*k]);
334 |     // A_k = C_k + 1^(k/N) D_k:
335 |     data[2*k] = Ck_re;  // A_k <-- C_k
336 |     data[2*k+1] = Ck_im;
337 |     // now A_k += D_k 1^(k/N)
338 |     ComplexAddProduct(Dk_re, Dk_im, kN_re, kN_im, &(data[2*k]), &(data[2*k+1]));
339 | 
340 |     int32 kdash = N2 - k;
341 |     if (kdash != k) {
342 |       // Next we handle the index k' = N/2 - k.  This is necessary
343 |       // to do now, to avoid invalidating data that we will later need.
344 |       // The quantities C_{k'} and D_{k'} are just the conjugates of C_k
345 |       // and D_k, so the equations are simple modifications of the above,
346 |       // replacing Ck_im and Dk_im with their negatives.
347 |       data[2*kdash] = Ck_re;  // A_k' <-- C_k'
348 |       data[2*kdash+1] = -Ck_im;
349 |       // now A_k' += D_k' 1^(k'/N)
350 |       // We use 1^(k'/N) = 1^((N/2 - k) / N) = 1^(1/2) 1^(-k/N) = -1 * (1^(k/N))^*
351 |       // so it's the same as 1^(k/N) but with the real part negated.
352 |       ComplexAddProduct(Dk_re, -Dk_im, -kN_re, kN_im, &(data[2*kdash]), &(data[2*kdash+1]));
353 |     }
354 |   }
355 | 
356 |   {  // Now handle k = 0.
357 |     // In simple terms: after the complex fft, data[0] becomes the sum of real
358 |     // parts input[0], input[2]... and data[1] becomes the sum of imaginary
359 |     // pats input[1], input[3]...
360 |     // "zeroth" [A_0] is just the sum of input[0]+input[1]+input[2]..
361 |     // and "n2th" [A_{N/2}] is input[0]-input[1]+input[2]... .
362 |     BaseFloat zeroth = data[0] + data[1],
363 |         n2th = data[0] - data[1];
364 |     data[0] = zeroth;
365 |     data[1] = n2th;
366 |     if (!forward) {
367 |       data[0] /= 2;
368 |       data[1] /= 2;
369 |     }
370 |   }
371 |   if (!forward) {  // call to base class
372 |     SplitRadixComplexFft::Compute(data, false, temp_buffer);
373 |     for (int32 i = 0; i < N; i++)
374 |       data[i] *= 2.0;
375 |     // This is so we get a factor of N increase, rather than N/2 which we would
376 |     // otherwise get from [ComplexFft, forward] + [ComplexFft, backward] in dimension N/2.
377 |     // It's for consistency with our normal FFT convensions.
378 |   }
379 | }
380 | 


--------------------------------------------------------------------------------
/src/transition-model.cpp:
--------------------------------------------------------------------------------
  1 | #include "transition-model.h"
  2 | 
  3 | const char* transmodel = "<TransitionModel>";
  4 | const char* topology = "<Topology>";
  5 | const char* tuplesName = "<Tuples>";
  6 | const char* triplesName = "<Triples>";
  7 | 
  8 | void TransitionModel::Read(FILE* fp)
  9 | {
 10 |     char token[128];
 11 |     ReadToken(fp, token);
 12 |     if(strcmp(transmodel, token) != 0)
 13 |     {
 14 |         printf("Model file type error!\n");
 15 |         return;
 16 |     }
 17 | 
 18 |     ReadTopo(fp);
 19 | 
 20 |     //Read tuples
 21 |     ReadToken(fp, token);
 22 |     int32 size;
 23 |     ReadBasicType(fp, &size);
 24 |     tuples.resize(size);
 25 |     for (int32 i = 0; i < size; i++)
 26 |     {
 27 |         ReadBasicType(fp, &(tuples[i].phone));
 28 |         ReadBasicType(fp, &(tuples[i].hmm_state));
 29 |         ReadBasicType(fp, &(tuples[i].forward_pdf));
 30 |         if (0 == strcmp(token, tuplesName))
 31 |         {
 32 |             ReadBasicType(fp, &(tuples[i].self_loop_pdf));
 33 |         }
 34 |         else if (0 == strcmp(token, triplesName))
 35 |         {
 36 |             tuples[i].self_loop_pdf = tuples[i].forward_pdf;
 37 |         }
 38 |     }
 39 |     ReadToken(fp, token);
 40 |     //TODO: Check token is </Triples> or </Tuples>
 41 |     ComputeDerived();
 42 |     ReadToken(fp, token); //<LogProbs>
 43 |     ReadFloatVectors(fp, &log_probs);
 44 |     ReadToken(fp, token); //</LogProbs>
 45 |     ReadToken(fp, token); //</TransitionModel>
 46 |     ComputeDerivedOfProbs();
 47 |     //TODO: Check
 48 | }
 49 | 
 50 | int32 TransitionModel::TransitionIdToPdf(int32 trans_id) const
 51 | {
 52 |     return id2pdf_id[trans_id];
 53 | }
 54 | 
 55 | void TransitionModel::ReadTopo(FILE *fp)
 56 | {
 57 |     char token[128];
 58 |     ReadToken(fp, token);
 59 |     if(strcmp(topology, token) != 0)
 60 |     {
 61 |         printf("Topology file type error!\n");
 62 |         return;
 63 |     }
 64 | 
 65 |     ReadIntegerVector(fp, &topo.phones);
 66 |     ReadIntegerVector(fp, &topo.phone2idx);
 67 | 
 68 |     //Read Tuples
 69 |     int32 size;
 70 |     ReadBasicType(fp, &size);
 71 |     bool is_hmm = true;
 72 |     topo.entries.resize(size);
 73 |     for (int32 i = 0; i < size; i++)
 74 |     {
 75 |         int32 thist_sz;
 76 |         ReadBasicType(fp, &thist_sz);
 77 |         topo.entries[i].resize(thist_sz);
 78 |         for (int32 j = 0 ; j < thist_sz; j++)
 79 |         {
 80 |             ReadBasicType(fp, &(topo.entries[i][j].forward_pdf_class));
 81 |             if(is_hmm)
 82 |             {
 83 |                 topo.entries[i][j].self_loop_pdf_class = topo.entries[i][j].forward_pdf_class;
 84 |             }
 85 |             else
 86 |             {
 87 |                 ReadBasicType(fp, &(topo.entries[i][j].self_loop_pdf_class));
 88 |             }
 89 |             int32 thiss_sz;
 90 |             ReadBasicType(fp, &thiss_sz);
 91 |             topo.entries[i][j].transitions.resize(thiss_sz);
 92 |             for (int32 k = 0; k < thiss_sz; k++)
 93 |             {
 94 |                 ReadBasicType(fp, &(topo.entries[i][j].transitions[k].first));
 95 |                 ReadBasicType(fp, &(topo.entries[i][j].transitions[k].second));
 96 |             }
 97 |         }
 98 |     }
 99 |     ReadToken(fp, token);
100 |     //TODO: Add check
101 | }
102 | 
103 | void TransitionModel::ComputeDerived()
104 | {
105 |     state2id.resize(tuples.size()+2);  // indexed by transition-state, which
106 |     // is one based, but also an entry for one past end of list.
107 | 
108 |     int32 cur_transition_id = 1;
109 |     num_pdfs = 0;
110 |     for (int32 tstate = 1;
111 |         tstate <= static_cast<int32>(tuples.size()+1);  // not a typo.
112 |         tstate++)
113 |     {
114 |         state2id[tstate] = cur_transition_id;
115 |         if (static_cast<size_t>(tstate) <= tuples.size())
116 |         {
117 |           int32 phone = tuples[tstate-1].phone,
118 |               hmm_state = tuples[tstate-1].hmm_state,
119 |               forward_pdf = tuples[tstate-1].forward_pdf,
120 |               self_loop_pdf = tuples[tstate-1].self_loop_pdf;
121 |           num_pdfs = max(num_pdfs, 1 + forward_pdf);
122 |           num_pdfs = max(num_pdfs, 1 + self_loop_pdf);
123 |           const HmmState &state = TopologyForPhone(phone)[hmm_state];
124 |           int32 my_num_ids = static_cast<int32>(state.transitions.size());
125 |           cur_transition_id += my_num_ids;  // # trans out of this state.
126 |         }
127 |     }
128 | 
129 |     id2state.resize(cur_transition_id);   // cur_transition_id is #transition-ids+1.
130 |     id2pdf_id.resize(cur_transition_id);
131 |     for (int32 tstate = 1; tstate <= static_cast<int32>(tuples.size()); tstate++)
132 |     {
133 |         for (int32 tid = state2id[tstate]; tid < state2id[tstate+1]; tid++)
134 |         {
135 |             id2state[tid] = tstate;
136 |             if (IsSelfLoop(tid))
137 |             {
138 |                 id2pdf_id[tid] = tuples[tstate-1].self_loop_pdf;
139 |             }
140 |             else
141 |             {
142 |                 id2pdf_id[tid] = tuples[tstate-1].forward_pdf;
143 |             }
144 |         }
145 |     }
146 | 
147 |     // The following statements put copies a large number in the region of memory
148 |     // past the end of the id2pdf_id_ array, while leaving the array as it was
149 |     // before.  The goal of this is to speed up decoding by disabling a check
150 |     // inside TransitionIdToPdf() that the transition-id was within the correct
151 |     // range.
152 |     int32 num_big_numbers = min<int32>(2000, cur_transition_id);
153 |     id2pdf_id.resize(cur_transition_id + num_big_numbers,
154 |                       std::numeric_limits<int32>::max());
155 |     id2pdf_id.resize(cur_transition_id);
156 | }
157 | 
158 | bool TransitionModel::IsSelfLoop(int32 trans_id) const {
159 |     int32 trans_state = id2state[trans_id];
160 |     int32 trans_index = trans_id - state2id[trans_state];
161 |     const Tuple &tuple = tuples[trans_state-1];
162 |     int32 phone = tuple.phone, hmm_state = tuple.hmm_state;
163 |     const TopologyEntry &entry = TopologyForPhone(phone);
164 |     return (static_cast<size_t>(trans_index) < entry[hmm_state].transitions.size()
165 |             && entry[hmm_state].transitions[trans_index].first == hmm_state);
166 | }
167 | 
168 | const TopologyEntry& TransitionModel::TopologyForPhone(int32 phone) const
169 | {
170 |     // Will throw if phone not covered.
171 |     if (static_cast<size_t>(phone) >= topo.phone2idx.size() || topo.phone2idx[phone] == -1) {
172 |         printf("TopologyForPhone(), phone %d not covered.\n", phone);
173 |     }
174 |     return topo.entries[topo.phone2idx[phone]];
175 | }
176 | 
177 | void TransitionModel::ComputeDerivedOfProbs()
178 | {
179 |     non_self_loop_log_probs.resize(NumTransitionStates()+1);  // this array indexed
180 |     //  by transition-state with nothing in zeroth element.
181 |     for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++)
182 |     {
183 |         int32 tid = SelfLoopOf(tstate);
184 |         if (tid == 0)
185 |         {   // no self-loop
186 |             non_self_loop_log_probs[tstate] = 0.0;  // log(1.0)
187 |         }
188 |         else
189 |         {
190 |             BaseFloat self_loop_prob = expf(GetTransitionLogProb(tid)),
191 |                   non_self_loop_prob = 1.0 - self_loop_prob;
192 |             if (non_self_loop_prob <= 0.0)
193 |             {
194 |                 printf("ComputeDerivedOfProbs(): non-self-loop prob is %f\n", non_self_loop_prob);
195 |                 non_self_loop_prob = 1.0e-10;  // just so we can continue...
196 |             }
197 |             non_self_loop_log_probs[tstate] = logf(non_self_loop_prob);  // will be negative.
198 |         }
199 |     }
200 | }
201 | 
202 | int32 TransitionModel::SelfLoopOf(int32 trans_state) const
203 | {   // returns the self-loop transition-id
204 |     const Tuple &tuple = tuples[trans_state-1];
205 |     // or zero if does not exist.
206 |     int32 phone = tuple.phone, hmm_state = tuple.hmm_state;
207 |     const TopologyEntry &entry = TopologyForPhone(phone);
208 | 
209 |     for (int32 trans_index = 0;
210 |         trans_index < static_cast<int32>(entry[hmm_state].transitions.size());
211 |         trans_index++)
212 |     {
213 |         if (entry[hmm_state].transitions[trans_index].first == hmm_state)
214 |         {
215 |             return PairToTransitionId(trans_state, trans_index);
216 |         }
217 |     }
218 | 
219 |     return 0;  // invalid transition id.
220 | }
221 | 
222 | BaseFloat TransitionModel::GetTransitionLogProb(int32 trans_id) const
223 | {
224 |   return log_probs[trans_id];
225 | }
226 | 
227 | int32 TransitionModel::PairToTransitionId(int32 trans_state, int32 trans_index) const
228 | {
229 |     return state2id[trans_state] + trans_index;
230 | }
231 | 
232 | int32 TransitionModel::NumTransitionStates()
233 | {
234 |     return tuples.size();
235 | }


--------------------------------------------------------------------------------
/src/wavereader.cpp:
--------------------------------------------------------------------------------
 1 | #include "wavereader.h"
 2 | 
 3 | WaveReader::WaveReader()
 4 | {
 5 |     memset(&m_wavefile, 0, sizeof(WaveFile));
 6 | }
 7 | 
 8 | WaveReader::~WaveReader()
 9 | {
10 |     m_wavefile.data.clear();
11 |     m_waveData.clear();
12 | }
13 | 
14 | void WaveReader::ReadWaveFile(const char* fileName)
15 | {
16 |     FILE *fp = fopen(fileName, "rb");
17 | 
18 |     if(!fp)
19 |     {
20 |         printf("Open wave file %s error\n", fileName);
21 |     }
22 | 
23 |     //读取文件头
24 |     fread(&m_wavefile, sizeof(WaveHeader), 1, fp);
25 | 
26 |     //读取数据
27 |     int dataSize = m_wavefile.header.subchunk2_size;
28 |     m_wavefile.data.resize(dataSize/2);
29 |     fread(m_wavefile.data.data(), dataSize, 1, fp);
30 | 
31 |     m_waveData.clear();
32 |     for(int i=0; i<dataSize/2; i++)
33 |     {
34 |         m_waveData.push_back(static_cast<BaseFloat>(m_wavefile.data[i]));
35 |     }
36 | 
37 |     fclose(fp);
38 | }


--------------------------------------------------------------------------------