├── CMakeLists.txt
├── example
    ├── data
    │   ├── ali.ark
    │   ├── ali.ark.txt
    │   ├── ali.scp
    │   ├── matrix-comp1.ark
    │   ├── matrix-comp1.scp
    │   ├── matrix-comp2.ark
    │   ├── matrix-comp2.scp
    │   ├── matrix-comp3.ark
    │   ├── matrix-comp3.scp
    │   ├── matrix-comp4.ark
    │   ├── matrix-comp4.scp
    │   ├── matrix-comp5.ark
    │   ├── matrix-comp5.scp
    │   ├── matrix-comp6.ark
    │   ├── matrix-comp6.scp
    │   ├── matrix-comp7.ark
    │   ├── matrix-comp7.scp
    │   ├── matrix.ark.txt
    │   ├── matrix.nocompress.ark
    │   ├── matrix.nocompress.scp
    │   ├── matrix.scp
    │   ├── post.ark
    │   ├── post.ark.txt
    │   ├── post.scp
    │   └── ref.txt
    ├── read-ali.py
    ├── read-compressed-matrix.py
    ├── read-matrix.py
    ├── read-post.py
    └── read-uncompressed-matrix.py
├── kaldi-ali.cc
├── kaldi-matrix-direct.cc
├── kaldi-matrix.cc
├── readme.md
├── shape-funcs.cc
└── shape-funcs.hh


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.8)
 2 | project(kaldi_reader_standalone)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 11)
 5 | if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
 6 |     # c++ regex is used in the code, so the version of gcc must be greater than 4.9
 7 |     message(FATAL_ERROR "VERSION OF GCC MUST BE GREATER THAN 5.1")
 8 | endif()
 9 | 
10 | set(PYTHONBIN "python" CACHE PATH "Path of python with tensorflow installed")
11 | 
12 | execute_process(
13 |         COMMAND ${PYTHONBIN} -c "import tensorflow as tf; print(tf.sysconfig.get_include())"
14 |         OUTPUT_VARIABLE DEFAULT_TF_INC
15 |         ERROR_VARIABLE ERROR_TF_INC
16 |         RESULT_VARIABLE RESULT_TF_INC
17 |         OUTPUT_STRIP_TRAILING_WHITESPACE
18 | )
19 | execute_process(
20 |         COMMAND ${PYTHONBIN} -c "import tensorflow as tf; print(tf.sysconfig.get_lib())"
21 |         OUTPUT_VARIABLE DEFAULT_TF_LIB
22 |         ERROR_VARIABLE ERROR_TF_LIB
23 |         RESULT_VARIABLE RESULT_TF_LIB
24 |         OUTPUT_STRIP_TRAILING_WHITESPACE
25 | )
26 | 
27 | execute_process(
28 |   COMMAND ${PYTHONBIN} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))"
29 |   OUTPUT_VARIABLE DEFAULT_TF_CXX_FLAGS
30 |   ERROR_VARIABLE ERROR_TF_CXX_FLAGS
31 |   RESULT_VARIABLE RESULT_TF_CXX_FLAGS
32 |   OUTPUT_STRIP_TRAILING_WHITESPACE
33 | )
34 | execute_process(
35 |   COMMAND ${PYTHONBIN} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))"
36 |   OUTPUT_VARIABLE DEFAULT_TF_LINK_FLAGS
37 |   ERROR_VARIABLE ERROR_TF_LINK_FLAGS
38 |   RESULT_VARIABLE RESULT_TF_LINK_FLAGS
39 |   OUTPUT_STRIP_TRAILING_WHITESPACE
40 | )
41 | set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${DEFAULT_TF_CXX_FLAGS} -std=c++11" )
42 | set( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DEFAULT_TF_LINK_FLAGS}" )
43 | #message("TF_INC is set: ${DEFAULT_TF_INC}")
44 | #message("TF_LIB is set: ${DEFAULT_TF_LIB}")
45 | #set(TF_INC "${DEFAULT_TF_INC}" CACHE PATH "Path of tensorflow including files")
46 | #set(TF_LIB "${DEFAULT_TF_LIB}" CACHE PATH "Path of tensorflow linking libraries")
47 | 
48 | set(TF_INC "${DEFAULT_TF_INC}")
49 | set(TF_LIB "${DEFAULT_TF_LIB}")
50 | 
51 | if ("${TF_INC}" STREQUAL "" OR "${TF_LIB}" STREQUAL "")
52 |     message(FATAL_ERROR "TF_INC and TF_LIB not set. Please set both variable manually, or set correct PYTHONBIN var.")
53 | endif()
54 | 
55 | 
56 | message("TF_INC is set: ${TF_INC}")
57 | message("TF_LIB is set: ${TF_LIB}")
58 | 
59 | set(SOURCE_FILES
60 |         kaldi-matrix.cc
61 |         kaldi-matrix-direct.cc
62 |         kaldi-ali.cc
63 |         shape-funcs.cc
64 |         )
65 | 
66 | add_library(kaldi_readers SHARED)
67 | 
68 | 
69 | # -fPIC
70 | set_property(TARGET kaldi_readers PROPERTY POSITION_INDEPENDENT_CODE ON)
71 | target_sources(kaldi_readers
72 |         PRIVATE
73 |         ${SOURCE_FILES}
74 |         )
75 | 
76 | target_include_directories(kaldi_readers
77 |         PRIVATE
78 |         ${TF_INC}
79 |         ${TF_INC}/external/nsync/public
80 |         )
81 | target_link_libraries(kaldi_readers
82 |         PRIVATE
83 |         ${TF_LIB}/libtensorflow_framework.so
84 |         )
85 | 
86 | 


--------------------------------------------------------------------------------
/example/data/ali.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/ali.ark


--------------------------------------------------------------------------------
/example/data/ali.ark.txt:
--------------------------------------------------------------------------------
1 | ali-1 25 70 3013 4 0 222 444 111
2 | ali-2 1 2 3 4 5 6 7 8


--------------------------------------------------------------------------------
/example/data/ali.scp:
--------------------------------------------------------------------------------
1 | ali-1 ali.ark:6
2 | ali-2 ali.ark:59
3 | 


--------------------------------------------------------------------------------
/example/data/matrix-comp1.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp1.ark


--------------------------------------------------------------------------------
/example/data/matrix-comp1.scp:
--------------------------------------------------------------------------------
1 | mat-1 matrix-comp1.ark:6
2 | mat-2 matrix-comp1.ark:50
3 | 


--------------------------------------------------------------------------------
/example/data/matrix-comp2.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp2.ark


--------------------------------------------------------------------------------
/example/data/matrix-comp2.scp:
--------------------------------------------------------------------------------
1 | mat-1 matrix-comp2.ark:6
2 | mat-2 matrix-comp2.ark:73
3 | 


--------------------------------------------------------------------------------
/example/data/matrix-comp3.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp3.ark


--------------------------------------------------------------------------------
/example/data/matrix-comp3.scp:
--------------------------------------------------------------------------------
1 | mat-1 matrix-comp3.ark:6
2 | mat-2 matrix-comp3.ark:50
3 | 


--------------------------------------------------------------------------------
/example/data/matrix-comp4.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp4.ark


--------------------------------------------------------------------------------
/example/data/matrix-comp4.scp:
--------------------------------------------------------------------------------
1 | mat-1 matrix-comp4.ark:6
2 | mat-2 matrix-comp4.ark:50
3 | 


--------------------------------------------------------------------------------
/example/data/matrix-comp5.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp5.ark


--------------------------------------------------------------------------------
/example/data/matrix-comp5.scp:
--------------------------------------------------------------------------------
1 | mat-1 matrix-comp5.ark:6
2 | mat-2 matrix-comp5.ark:42
3 | 


--------------------------------------------------------------------------------
/example/data/matrix-comp6.ark:
--------------------------------------------------------------------------------
1 | mat-1  BCM3       C      mat-2  BCM3       C      


--------------------------------------------------------------------------------
/example/data/matrix-comp6.scp:
--------------------------------------------------------------------------------
1 | mat-1 matrix-comp6.ark:6
2 | mat-2 matrix-comp6.ark:42
3 | 


--------------------------------------------------------------------------------
/example/data/matrix-comp7.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp7.ark


--------------------------------------------------------------------------------
/example/data/matrix-comp7.scp:
--------------------------------------------------------------------------------
1 | mat-1 matrix-comp7.ark:6
2 | mat-2 matrix-comp7.ark:42
3 | 


--------------------------------------------------------------------------------
/example/data/matrix.ark.txt:
--------------------------------------------------------------------------------
1 | mat-1 [
2 | 1.0 2.0 3.0 4.0
3 | 2.0 3.0 4.0 5.0 ]
4 | mat-2 [
5 | 7.0 7.0 7.0 7.0
6 | 2.0 3.0 4.0 5.0 ]
7 | 


--------------------------------------------------------------------------------
/example/data/matrix.nocompress.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix.nocompress.ark


--------------------------------------------------------------------------------
/example/data/matrix.nocompress.scp:
--------------------------------------------------------------------------------
1 | mat-1 matrix.nocompress.ark:6
2 | mat-2 matrix.nocompress.ark:59
3 | 


--------------------------------------------------------------------------------
/example/data/matrix.scp:
--------------------------------------------------------------------------------
 1 | mat-1 matrix-comp1.ark:6
 2 | mat-2 matrix-comp1.ark:50
 3 | mat-1 matrix-comp2.ark:6
 4 | mat-2 matrix-comp2.ark:73
 5 | mat-1 matrix-comp3.ark:6
 6 | mat-2 matrix-comp3.ark:50
 7 | mat-1 matrix-comp4.ark:6
 8 | mat-2 matrix-comp4.ark:50
 9 | mat-1 matrix-comp5.ark:6
10 | mat-2 matrix-comp5.ark:42
11 | mat-1 matrix-comp6.ark:6
12 | mat-2 matrix-comp6.ark:42
13 | mat-1 matrix-comp7.ark:6
14 | mat-2 matrix-comp7.ark:42
15 | mat-1 matrix.nocompress.ark:6
16 | mat-2 matrix.nocompress.ark:59
17 | 


--------------------------------------------------------------------------------
/example/data/post.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/post.ark


--------------------------------------------------------------------------------
/example/data/post.ark.txt:
--------------------------------------------------------------------------------
1 | post-1 [ 25 1 ] [ 70 1 ] [ 3013 1 ] [ 4 1 ] [ 0 1 ] [ 222 1 ] [ 444 1 ] [ 111 1 ]
2 | post-2 [ 1 1 ] [ 2 1 ] [ 3 1 ] [ 4 1 ] [ 5 1 ] [ 6 1 ] [ 7 1 ] [ 8 1 ]


--------------------------------------------------------------------------------
/example/data/post.scp:
--------------------------------------------------------------------------------
1 | post-1 post.ark:7
2 | post-2 post.ark:141
3 | 


--------------------------------------------------------------------------------
/example/data/ref.txt:
--------------------------------------------------------------------------------
 1 | comp1
 2 | mat-1  [
 3 |   1 2.000015 2.999969 3.999985 
 4 |   2.000015 2.999969 3.999985 5 ]
 5 | mat-2  [
 6 |   7 7 7 7 
 7 |   2 3 4 5 ]
 8 |   
 9 | comp2
10 | mat-1  [
11 |   1 2.000015 2.999969 3.999985 
12 |   2.000015 3 4 5 ]
13 | mat-2  [
14 |   7 7 7 7 
15 |   2 3 4 5 ]
16 |   
17 | comp3
18 | mat-1  [
19 |   1 2.000015 2.999969 3.999985 
20 |   2.000015 2.999969 3.999985 5 ]
21 | mat-2  [
22 |   7 7 7 7 
23 |   2 3 4 5 ]
24 | 
25 | comp4
26 | mat-1  [
27 |   1 2 3 4 
28 |   2 3 4 5 ]
29 | mat-2  [
30 |   7 7 7 7 
31 |   2 3 4 5 ]
32 | 
33 | comp5
34 | mat-1  [
35 |   1 2.003922 2.992157 3.996078 
36 |   2.003922 2.992157 3.996078 5 ]
37 | mat-2  [
38 |   7 7 7 7 
39 |   2 3 4 5 ]
40 | 
41 | comp6
42 | mat-1  [
43 |   1 2 3 4 
44 |   2 3 4 5 ]
45 | mat-2  [
46 |   7 7 7 7 
47 |   2 3 4 5 ]
48 | 
49 | comp7
50 | mat-1  [
51 |   1 1 1 1 
52 |   1 1 1 1 ]
53 | mat-2  [
54 |   1 1 1 1 
55 |   1 1 1 1 ]
56 | 
57 | nocomp
58 | mat-1  [
59 |   1 2 3 4 
60 |   2 3 4 5 ]
61 | mat-2  [
62 |   7 7 7 7 
63 |   2 3 4 5 ]
64 | 


--------------------------------------------------------------------------------
/example/read-ali.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | kaldi_module = tf.load_op_library("../cmake-build-release/libkaldi_readers.so")
 3 | 
 4 | def main():
 5 |     value_rspecific = "./data/ali.ark:6"
 6 |     rspec = tf.constant(value_rspecific, tf.string)
 7 |     ali_raw_value = kaldi_module.read_kaldi_post_and_ali(rspec, is_reading_post=False)
 8 |     ali_value = kaldi_module.decode_kaldi_ali(ali_raw_value, tf.int32, is_reading_post=False, merge=False)
 9 |     ali_value.set_shape([None])
10 |     sess = tf.Session()
11 |     sess.run(tf.global_variables_initializer())
12 |     ali = sess.run(ali_value)
13 |     print(ali.shape)
14 |     print(ali)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/example/read-compressed-matrix.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | kaldi_module = tf.load_op_library("../cmake-build-release/libkaldi_readers.so")
 3 | 
 4 | def main():
 5 |     value_rspecific = "./data/matrix.compressed.ark:6"
 6 |     rspec = tf.constant(value_rspecific, tf.string)
 7 |     #feats_raw_value = kaldi_module.read_kaldi_matrix(rspec)
 8 |     #feats_value = kaldi_module.decode_kaldi_matrix(feats_raw_value, tf.float32)
 9 |     feats_value = kaldi_module.read_and_decode_kaldi_matrix(rspec, left_padding=0, right_padding=0)
10 |     feats_value.set_shape([None, 4])
11 |     sess = tf.Session()
12 |     sess.run(tf.global_variables_initializer())
13 |     feats = sess.run(feats_value)
14 |     print(feats.shape)
15 |     print(feats)
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     main()
20 | 


--------------------------------------------------------------------------------
/example/read-matrix.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import tensorflow as tf
 3 | kaldi_module = tf.load_op_library("../../cmake-build-debug/libkaldi_readers.so")
 4 | 
 5 | def main():
 6 |     if len(sys.argv) != 2:
 7 |         print("Usage: python read-matrix.py /path/to/filename.scp")
 8 |         return 1
 9 |     scpfile = sys.argv[1]
10 |     with open(scpfile) as fin:
11 |         scplist = fin.readlines()
12 |     scplist = [scpitem.strip().split()[1] for scpitem in scplist]
13 |     value_rspecific = "./data/matrix.nocompress.ark:59"
14 |     rspec = tf.placeholder(tf.string)
15 |     feats_value = kaldi_module.read_and_decode_kaldi_matrix(rspec, left_padding=3, right_padding=4)
16 |     #feats_value.set_shape([None, 4])
17 |     feats_value.set_shape([None, None])
18 |     sess = tf.Session()
19 |     sess.run(tf.global_variables_initializer())
20 |     for rspec_value in scplist:
21 |         feats = sess.run(feats_value, feed_dict={rspec: rspec_value})
22 |         print(rspec_value)
23 |         print(feats.shape)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/example/read-post.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | kaldi_module = tf.load_op_library("../build/libkaldi_readers.so")
 3 | 
 4 | def main():
 5 |     value_rspecific = "./data/post.ark:141"
 6 |     rspec = tf.constant(value_rspecific, tf.string)
 7 |     ali_raw_value = kaldi_module.read_kaldi_post_and_ali(rspec, is_reading_post=True)
 8 |     ali_value = kaldi_module.decode_kaldi_ali(ali_raw_value, tf.int32, is_reading_post=True)
 9 |     ali_value.set_shape([None])
10 |     sess = tf.Session()
11 |     sess.run(tf.global_variables_initializer())
12 |     ali = sess.run(ali_value)
13 |     print(ali.shape)
14 |     print(ali)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/example/read-uncompressed-matrix.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | kaldi_module = tf.load_op_library("../build/libkaldi_readers.so")
 3 | 
 4 | def main():
 5 |     value_rspecific = "./data/matrix.nocompress.ark:59"
 6 |     rspec = tf.constant(value_rspecific, tf.string)
 7 |     feats_value = kaldi_module.read_and_decode_kaldi_matrix(rspec, left_padding=3, right_padding=4)
 8 |     feats_value.set_shape([None, 4])
 9 |     sess = tf.Session()
10 |     sess.run(tf.global_variables_initializer())
11 |     feats = sess.run(feats_value)
12 |     print(feats.shape)
13 |     print(feats)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     main()
18 | 


--------------------------------------------------------------------------------
/kaldi-ali.cc:
--------------------------------------------------------------------------------
  1 | #include <memory>
  2 | #include <regex>
  3 | #include "tensorflow/core/framework/reader_base.h"
  4 | #include "tensorflow/core/framework/reader_op_kernel.h"
  5 | #include "tensorflow/core/lib/core/errors.h"
  6 | #include "tensorflow/core/lib/io/buffered_inputstream.h"
  7 | #include "tensorflow/core/lib/io/random_inputstream.h"
  8 | #include "tensorflow/core/lib/io/zlib_compression_options.h"
  9 | #include "tensorflow/core/lib/io/zlib_inputstream.h"
 10 | #include "tensorflow/core/lib/strings/strcat.h"
 11 | #include "tensorflow/core/platform/env.h"
 12 | 
 13 | #include "shape-funcs.hh"
 14 | 
 15 | namespace tensorflow {
 16 |     using shape_util::ScalarInputsAndOutputs;
 17 |     using shape_util::TwoElementOutput;
 18 | 
 19 |     static Status ReadKaldiPostAndAli(Env* env, const string& ark_path, uint64 ark_offset, bool is_reading_post, string* contents) {
 20 |         enum { kBufferSize = 256 << 10 /* 256 kB */ };
 21 | 
 22 |         std::unique_ptr<RandomAccessFile> file_;
 23 |         std::unique_ptr<io::InputStreamInterface> buffered_inputstream_;
 24 | 
 25 |         TF_RETURN_IF_ERROR(env->NewRandomAccessFile(ark_path, &file_));
 26 |         buffered_inputstream_.reset(
 27 |                 new io::BufferedInputStream(file_.get(), kBufferSize));
 28 |         TF_RETURN_IF_ERROR(buffered_inputstream_->SkipNBytes(ark_offset));
 29 | 
 30 |         // Actural reading start from here
 31 |         string binary;
 32 |         TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(2, &binary));
 33 |         CHECK_EQ(binary[0], '\0');
 34 |         CHECK_EQ(binary[1], 'B');
 35 |         string header_buffer;
 36 |         TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(1, &header_buffer));
 37 |         if (header_buffer[0] == '\4') {
 38 |             // This is a vector of int
 39 |             string size_str;
 40 |             buffered_inputstream_->ReadNBytes(4, &size_str);
 41 |             int32 size = *reinterpret_cast<const int32*>(size_str.data());
 42 |             string data;
 43 |             if (is_reading_post) {
 44 |                 for (int32 outer_vec_idx = 0; outer_vec_idx < size; outer_vec_idx++) {
 45 |                     // <1> <4> [<1> <4> <1> <4>] [<1> <4> <1> <4>]
 46 |                     string inner_size_str;
 47 |                     buffered_inputstream_->ReadNBytes(5, &inner_size_str);
 48 |                     int32 inner_size = *reinterpret_cast<const int32 *>(inner_size_str.data() + 1);
 49 |                     string inner_vec_data;
 50 |                     buffered_inputstream_->ReadNBytes(inner_size * 10, &inner_vec_data);
 51 |                     data += inner_size_str + inner_vec_data;
 52 |                 }
 53 |             } else {
 54 |                 TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(size * 5, &data));
 55 |             }
 56 |             *contents = header_buffer + size_str + data;
 57 |         } else {
 58 |             return Status(error::UNAVAILABLE, "Unknown Kaldi Post or Ali: " + header_buffer);
 59 |         }
 60 |         return Status::OK();
 61 |     }
 62 | 
 63 |     class ReadKaldiPostAndAliOp : public OpKernel {
 64 |     public:
 65 |         using OpKernel::OpKernel;
 66 |         explicit ReadKaldiPostAndAliOp(OpKernelConstruction *context)
 67 |                 :OpKernel(context),
 68 |                  id_pat_("^(\\S+):(\\d+)")
 69 |         {
 70 |             OP_REQUIRES_OK(context, context->GetAttr("is_reading_post", &is_reading_post_));
 71 |         }
 72 |         void Compute(OpKernelContext* context) override {
 73 | 
 74 |             const Tensor* input;
 75 | 
 76 |             OP_REQUIRES_OK(context, context->input("scpline", &input));
 77 |             OP_REQUIRES(context, TensorShapeUtils::IsScalar(input->shape()),
 78 |                         errors::InvalidArgument(
 79 |                                 "Input filename tensor must be scalar, but had shape: ",
 80 |                                 input->shape().DebugString()));
 81 | 
 82 |             Tensor* output = nullptr;
 83 |             OP_REQUIRES_OK(context, context->allocate_output("contents",
 84 |                                                              TensorShape({}), &output));
 85 |             const std::regex id_pat("^(\\S+):(\\d+)");
 86 |             std::smatch m;
 87 |             string half_scp_line = input->scalar<string>()();
 88 |             bool matched = std::regex_search(half_scp_line, m, id_pat);
 89 |             OP_REQUIRES(context, matched, Status(error::INVALID_ARGUMENT, "Script line is " + half_scp_line));
 90 |             string ark_path = m[1];
 91 |             string ark_offset_str = m[2];
 92 |             uint64 ark_offset = std::stoull(ark_offset_str);
 93 | 
 94 |             OP_REQUIRES_OK(context,
 95 |                            ReadKaldiPostAndAli(context->env(), ark_path, ark_offset, is_reading_post_,
 96 |                                                &output->scalar<string>()()));
 97 |         }
 98 |     private:
 99 |         bool is_reading_post_;
100 |         const std::regex id_pat_;
101 |     };
102 |     REGISTER_KERNEL_BUILDER(Name("ReadKaldiPostAndAli").Device(DEVICE_CPU), ReadKaldiPostAndAliOp);
103 | 
104 |     REGISTER_OP("ReadKaldiPostAndAli")
105 |             .Attr("is_reading_post: bool")
106 |             .Input("scpline: string")
107 |             .Output("contents: string")
108 |             .SetShapeFn(ScalarInputsAndOutputs)
109 |             .Doc(R"doc(
110 | Reads and outputs the entire contents of the input kaldi post or ali ark filename.
111 | 
112 | scpline: scalar. /path/to/ark.file:12345
113 | )doc");
114 | 
115 |     class DecodeKaldiAliOp : public OpKernel {
116 |     public:
117 |         explicit DecodeKaldiAliOp(OpKernelConstruction* context) : OpKernel(context) {
118 |             OP_REQUIRES_OK(context, context->GetAttr("out_type", &out_type_));
119 |             OP_REQUIRES_OK(context, context->GetAttr("is_reading_post", &is_reading_post_));
120 |             OP_REQUIRES_OK(context, context->GetAttr("merge", &merge_));
121 |         }
122 | 
123 |         void Compute(OpKernelContext* context) override {
124 |             const auto& input = context->input(0);
125 |             int64 str_size = -1;
126 |             auto flat_in = input.flat<string>();
127 |             OP_REQUIRES(context, flat_in.size() == 1,
128 |                         errors::InvalidArgument(
129 |                                 "DecodeKaldiAliOp requires input string size = 1"
130 |                         )
131 |             );
132 |             const string& in_str = flat_in(0);
133 |             str_size = in_str.size();
134 | 
135 |             const char* in_data = reinterpret_cast<const char*>(flat_in(0).data());
136 |             TensorShape out_shape;
137 |             int32 num_elem = *reinterpret_cast<const int32*>(in_data + 1);
138 |             if (!merge_) {
139 |                 out_shape.AddDim(num_elem);
140 |             } else {
141 |                 int32 prev_elem = -1;
142 |                 int32 count = 0;
143 |                 const char* p = in_data + 5;
144 |                 for (int32 frame_idx = 0; frame_idx < num_elem; frame_idx ++) {
145 |                     int32 curr_elem;
146 |                     if (is_reading_post_) {
147 |                         curr_elem = *reinterpret_cast<const int32*>(p + 5 + 1);
148 |                         p += 15;
149 |                     } else {
150 |                         curr_elem = *reinterpret_cast<const int32*>(p + 1);
151 |                         p += 5;
152 |                     }
153 |                     if (curr_elem != prev_elem) {
154 |                         count ++;
155 |                         prev_elem = curr_elem;
156 |                     }
157 |                 }
158 |                 out_shape.AddDim(count);
159 |             }
160 | 
161 |             if (str_size == -1 || str_size == 0) {  // Empty input
162 |                 Tensor* output_tensor = nullptr;
163 |                 OP_REQUIRES_OK(context, context->allocate_output("output", out_shape,
164 |                                                                  &output_tensor));
165 |                 return;
166 |             }
167 | 
168 |             Tensor* output_tensor = nullptr;
169 |             OP_REQUIRES_OK(
170 |                     context, context->allocate_output("output", out_shape, &output_tensor));
171 |             auto out = output_tensor->flat<int32>();
172 | 
173 |             int32* out_data = out.data();
174 |             const char* in_bytes = in_data + 5;
175 |             if (!merge_) {
176 |                 if (is_reading_post_) {
177 |                     int32 prev_elem = -1;
178 |                     for (int32 frame_idx = 0; frame_idx < num_elem; frame_idx++) {
179 |                         out_data[frame_idx] = *reinterpret_cast<const int32 *>(in_bytes + 5 + 1);
180 |                         in_bytes += 15;
181 |                     }
182 |                 } else {
183 |                     for (int32 frame_idx = 0; frame_idx < num_elem; frame_idx++) {
184 |                         out_data[frame_idx] = *reinterpret_cast<const int32 *>(in_bytes + 1);
185 |                         in_bytes += 5;
186 |                     }
187 |                 }
188 |             } else {
189 |                 int32 prev_elem = -1;
190 |                 int32 count = 0;
191 |                 for (int32 frame_idx = 0; frame_idx < num_elem; frame_idx++) {
192 |                     int32 curr_elem;
193 |                     if (is_reading_post_) {
194 |                         curr_elem = *reinterpret_cast<const int32*>(in_bytes + 5 + 1);
195 |                         in_bytes += 15;
196 |                     } else {
197 |                         curr_elem = *reinterpret_cast<const int32*>(in_bytes + 1);
198 |                         in_bytes += 5;
199 |                     }
200 |                     if (curr_elem != prev_elem) {
201 |                         out_data[count] = curr_elem;
202 |                         count ++;
203 |                         prev_elem = curr_elem;
204 |                     }
205 |                 }
206 |             }
207 |         }
208 | 
209 |     private:
210 |         bool is_reading_post_;
211 |         bool merge_;
212 |         DataType out_type_;
213 | 
214 |     };
215 | 
216 |     REGISTER_KERNEL_BUILDER(Name("DecodeKaldiAli").Device(DEVICE_CPU), DecodeKaldiAliOp);
217 | 
218 |     REGISTER_OP("DecodeKaldiAli")
219 |             .Input("bytes: string")
220 |             .Output("output: out_type")
221 |             .Attr("out_type: {int32}")
222 |             .Attr("is_reading_post: bool")
223 |             .Attr("merge: bool")
224 |             .SetShapeFn(shape_inference::UnknownShape)
225 |             .Doc(R"doc(
226 | Reinterpret the bytes of a string as a kaldi ali
227 | )doc");
228 | 
229 | 
230 | }  // namespace tensorflow
231 | 


--------------------------------------------------------------------------------
/kaldi-matrix-direct.cc:
--------------------------------------------------------------------------------
  1 | #include <memory>
  2 | #include <regex>
  3 | #include "tensorflow/core/framework/reader_base.h"
  4 | #include "tensorflow/core/framework/reader_op_kernel.h"
  5 | #include "tensorflow/core/lib/core/errors.h"
  6 | #include "tensorflow/core/lib/io/buffered_inputstream.h"
  7 | #include "tensorflow/core/lib/io/random_inputstream.h"
  8 | #include "tensorflow/core/lib/strings/strcat.h"
  9 | #include "tensorflow/core/platform/env.h"
 10 | #include "tensorflow/core/framework/common_shape_fns.h"
 11 | #include "shape-funcs.hh"
 12 | 
 13 | namespace tensorflow {
 14 |     using shape_util::ScalarInputsAndOutputs;
 15 |     using shape_util::TwoElementOutput;
 16 | 
 17 |     class ReadAndDecodeKaldiMatrixOp : public OpKernel {
 18 |     public:
 19 | 
 20 |         using OpKernel::OpKernel;
 21 | 
 22 |         explicit ReadAndDecodeKaldiMatrixOp(OpKernelConstruction* context): OpKernel(context) {
 23 |             OP_REQUIRES_OK(context, context->GetAttr("left_padding", &left_padding_));
 24 |             OP_REQUIRES_OK(context, context->GetAttr("right_padding", &right_padding_));
 25 |         }
 26 |         void Compute(OpKernelContext* context) override {
 27 | 
 28 |             const Tensor* input;
 29 |             OP_REQUIRES_OK(context, context->input("scpline", &input));
 30 |             OP_REQUIRES(context, TensorShapeUtils::IsScalar(input->shape()),
 31 |                         errors::InvalidArgument(
 32 |                                 "Input filename tensor must be scalar, but had shape: ",
 33 |                                 input->shape().DebugString()));
 34 | 
 35 |             const std::regex id_pat("^(\\S+):(\\d+)");
 36 |             std::smatch m;
 37 |             string half_scp_line = input->scalar<string>()();
 38 |             bool matched = std::regex_search(half_scp_line, m, id_pat);
 39 |             OP_REQUIRES(context, matched, Status(error::INVALID_ARGUMENT, "Script line is " + half_scp_line));
 40 |             string ark_path = m[1];
 41 |             string ark_offset_str = m[2];
 42 |             uint64 ark_offset = std::stoull(ark_offset_str);
 43 | 
 44 |             std::unique_ptr<RandomAccessFile> file;
 45 |             OP_REQUIRES_OK(context, context->env()->NewRandomAccessFile(ark_path, &file));
 46 |             uint64 rel_offset = 0;
 47 |             StringPiece data_holder;
 48 |             char data_header[10];
 49 |             OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, 2, &data_holder, data_header));
 50 |             rel_offset += 2;
 51 |             bool is_binary = (data_header[0] == '\0' && data_header[1] == 'B');
 52 |             OP_REQUIRES(context, is_binary, Status(error::INVALID_ARGUMENT,
 53 |                                                    "We only support binary format ark."));
 54 | 
 55 |             OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, 3, &data_holder, data_header));
 56 |             rel_offset += 3;
 57 | 
 58 |             TensorShape out_shape;
 59 | 
 60 |             if (data_holder == "FM ") {
 61 |                 int8 row_nbyte;
 62 |                 int32 row;
 63 |                 int8 col_nbyte;
 64 |                 int32 col;
 65 |                 OP_REQUIRES_OK(context,
 66 |                                file->Read(ark_offset + rel_offset, 1, &data_holder,
 67 |                                           reinterpret_cast<char*>(&row_nbyte)));
 68 |                 rel_offset += 1;
 69 |                 OP_REQUIRES_OK(context,
 70 |                                file->Read(ark_offset + rel_offset, 4, &data_holder,
 71 |                                           reinterpret_cast<char*>(&row)));
 72 |                 rel_offset += 4;
 73 |                 OP_REQUIRES_OK(context,
 74 |                                file->Read(ark_offset + rel_offset, 1, &data_holder,
 75 |                                           reinterpret_cast<char*>(&col_nbyte)));
 76 |                 rel_offset += 1;
 77 |                 OP_REQUIRES_OK(context,
 78 |                                file->Read(ark_offset + rel_offset, 4, &data_holder,
 79 |                                           reinterpret_cast<char*>(&col)));
 80 |                 rel_offset += 4;
 81 | 
 82 |                 out_shape.AddDim(left_padding_ + row + right_padding_);
 83 |                 out_shape.AddDim(col);
 84 |                 Tensor* output_tensor = nullptr;
 85 |                 OP_REQUIRES_OK(
 86 |                         context, context->allocate_output("output", out_shape, &output_tensor));
 87 |                 auto out = output_tensor->flat<float>();
 88 | 
 89 |                 float* out_data = out.data();
 90 |                 OP_REQUIRES_OK(context,
 91 |                                file->Read(ark_offset + rel_offset, row * col * sizeof(float), &data_holder,
 92 |                         reinterpret_cast<char*>(out_data + left_padding_ * col)));
 93 | 
 94 |                 for (int64 i = 0; i < left_padding_; i ++) {
 95 |                     for (int j = 0; j < col; j ++) {
 96 |                         *(out_data + i * col + j) = *(out_data + left_padding_ * col + j);
 97 |                     }
 98 |                 }
 99 |                 for (int64 i = left_padding_ + row; i < left_padding_ + row + right_padding_; i ++) {
100 |                     for (int j = 0; j < col; j ++) {
101 |                         *(out_data + i * col + j) = *(out_data + (left_padding_ + row - 1) * col + j);
102 |                     }
103 |                 }
104 | 
105 |             } else if (data_holder == "CM ") {
106 |                 GlobalHeader h;
107 |                 h.format = 1;
108 |                 OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, sizeof(h) - 4, &data_holder,
109 |                         reinterpret_cast<char*>(&h) + 4));
110 |                 rel_offset += (sizeof(h) - 4);
111 |                 out_shape.AddDim(left_padding_ + h.num_rows + right_padding_);
112 |                 out_shape.AddDim(h.num_cols);
113 |                 Tensor* output_tensor = nullptr;
114 |                 OP_REQUIRES_OK(
115 |                         context, context->allocate_output("output", out_shape, &output_tensor));
116 |                 auto out = output_tensor->flat<float>();
117 | 
118 |                 uint64 remaining_size = h.num_cols * (h.num_rows + sizeof(PerColHeader));
119 |                 string compressed_buffer;
120 |                 compressed_buffer.resize(remaining_size);
121 |                 OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, remaining_size, &data_holder,
122 |                                                    &compressed_buffer[0]));
123 |                 rel_offset += remaining_size;
124 | 
125 |                 float* out_data = out.data();
126 |                 const char* in_data = compressed_buffer.data();
127 | 
128 |                 const PerColHeader *per_col_header = reinterpret_cast<const PerColHeader*>(in_data);
129 |                 const uint8 *in_data_bytes = reinterpret_cast<const uint8*>(per_col_header + h.num_cols);
130 |                 for (int64 i = 0; i < h.num_cols; i++, per_col_header++) {
131 |                     float   p0 = Uint16ToFloat(h, per_col_header->percentile_0),
132 |                             p25 = Uint16ToFloat(h, per_col_header->percentile_25),
133 |                             p75 = Uint16ToFloat(h, per_col_header->percentile_75),
134 |                             p100 = Uint16ToFloat(h, per_col_header->percentile_100);
135 | 
136 |                     for (int64 j = left_padding_; j < left_padding_ + h.num_rows; j ++, in_data_bytes ++) {
137 |                         float f = CharToFloat(p0, p25, p75, p100, *in_data_bytes);
138 |                         *(out_data + j * h.num_cols + i) = f;
139 |                     }
140 |                 }
141 | 
142 |                 for (int64 i = 0; i < left_padding_; i ++) {
143 |                     for (int j = 0; j < h.num_cols; j ++) {
144 |                         *(out_data + i * h.num_cols + j) = *(out_data + left_padding_ * h.num_cols + j);
145 |                     }
146 |                 }
147 |                 for (int64 i = left_padding_ + h.num_rows; i < left_padding_ + h.num_rows + right_padding_; i ++) {
148 |                     for (int j = 0; j < h.num_cols; j ++) {
149 |                         *(out_data + i * h.num_cols + j) = *(out_data + (left_padding_ + h.num_rows - 1) * h.num_cols + j);
150 |                     }
151 |                 }
152 |             } else if (data_holder == "CM2") {
153 |                 rel_offset ++;
154 |                 GlobalHeader h;
155 |                 h.format = 2;
156 |                 OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, sizeof(h) - 4, &data_holder,
157 |                                                    reinterpret_cast<char*>(&h) + 4));
158 |                 rel_offset += (sizeof(h) - 4);
159 |                 out_shape.AddDim(left_padding_ + h.num_rows + right_padding_);
160 |                 out_shape.AddDim(h.num_cols);
161 |                 Tensor* output_tensor = nullptr;
162 |                 OP_REQUIRES_OK(
163 |                         context, context->allocate_output("output", out_shape, &output_tensor));
164 |                 auto out = output_tensor->flat<float>();
165 | 
166 |                 uint64 size = DataSize(h);
167 |                 uint64 remaining_size = size - sizeof(GlobalHeader);
168 |                 string compressed_buffer;
169 |                 compressed_buffer.resize(remaining_size);
170 |                 OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, remaining_size, &data_holder,
171 |                                                    &compressed_buffer[0]));
172 |                 rel_offset += remaining_size;
173 | 
174 |                 float* out_data = out.data();
175 |                 const char* in_data = compressed_buffer.data();
176 | 
177 |                 const uint16 *in_data_uint16 = reinterpret_cast<const uint16*>(in_data);
178 |                 float min_value = h.min_value;
179 |                 float increment = h.range * (1.0 / 65535.0);
180 |                 for (int64 i = left_padding_; i < left_padding_ + h.num_rows; i++) {
181 |                     for (int64 j = 0; j < h.num_cols; j++) {
182 |                         *(out_data + i * h.num_cols + j) = min_value + in_data_uint16[j] * increment;
183 |                     }
184 |                     in_data_uint16 += h.num_cols;
185 |                 }
186 |                 for (int64 i = 0; i < left_padding_; i ++) {
187 |                     for (int j = 0; j < h.num_cols; j ++) {
188 |                         *(out_data + i * h.num_cols + j) = *(out_data + left_padding_ * h.num_cols + j);
189 |                     }
190 |                 }
191 |                 for (int64 i = left_padding_ + h.num_rows; i < left_padding_ + h.num_rows + right_padding_; i ++) {
192 |                     for (int j = 0; j < h.num_cols; j ++) {
193 |                         *(out_data + i * h.num_cols + j) = *(out_data + (left_padding_ + h.num_rows - 1) * h.num_cols + j);
194 |                     }
195 |                 }
196 |             } else if (data_holder == "CM3") {
197 |                 rel_offset ++;
198 |                 GlobalHeader h;
199 |                 h.format = 3;
200 |                 OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, sizeof(h) - 4, &data_holder,
201 |                                                    reinterpret_cast<char*>(&h) + 4));
202 |                 rel_offset += (sizeof(h) - 4);
203 |                 out_shape.AddDim(left_padding_ + h.num_rows + right_padding_);
204 |                 out_shape.AddDim(h.num_cols);
205 |                 Tensor* output_tensor = nullptr;
206 |                 OP_REQUIRES_OK(
207 |                         context, context->allocate_output("output", out_shape, &output_tensor));
208 |                 auto out = output_tensor->flat<float>();
209 | 
210 |                 uint64 size = DataSize(h);
211 |                 uint64 remaining_size = size - sizeof(GlobalHeader);
212 |                 string compressed_buffer;
213 |                 compressed_buffer.resize(remaining_size);
214 |                 OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, remaining_size, &data_holder,
215 |                                                    &compressed_buffer[0]));
216 |                 rel_offset += remaining_size;
217 | 
218 |                 float* out_data = out.data();
219 |                 const char* in_data = compressed_buffer.data();
220 | 
221 |                 float min_value = h.min_value, increment = h.range * (1.0 / 255.0);
222 |                 const uint8 *in_data_bytes = reinterpret_cast<const uint8*>(in_data);
223 |                 for (int64 i = left_padding_; i < left_padding_ + h.num_rows; i++) {
224 |                     for (int64 j = 0; j < h.num_cols; j ++) {
225 |                         *(out_data + i * h.num_cols + j) = h.min_value + in_data_bytes[j] * increment;
226 |                     }
227 |                     in_data_bytes += h.num_cols;
228 |                 }
229 |                 for (int64 i = 0; i < left_padding_; i ++) {
230 |                     for (int j = 0; j < h.num_cols; j ++) {
231 |                         *(out_data + i * h.num_cols + j) = *(out_data + left_padding_ * h.num_cols + j);
232 |                     }
233 |                 }
234 |                 for (int64 i = left_padding_ + h.num_rows; i < left_padding_ + h.num_rows + right_padding_; i ++) {
235 |                     for (int j = 0; j < h.num_cols; j ++) {
236 |                         *(out_data + i * h.num_cols + j) = *(out_data + (left_padding_ + h.num_rows - 1) * h.num_cols + j);
237 |                     }
238 |                 }
239 |             } else {
240 |                 OP_REQUIRES_OK(context, Status(error::UNAVAILABLE,
241 |                                                "Unknown Kaldi Matrix:" + data_holder.ToString() +
242 |                                                " When reading \"" + half_scp_line + "\"" +
243 |                                                " Ark: " + ark_path +
244 |                                                " OFFSET: " + std::to_string(ark_offset) ));
245 |             }
246 |         }
247 |     private:
248 |         int64 left_padding_, right_padding_;
249 |         enum DataFormat {
250 |             kOneByteWithColHeaders = 1,
251 |             kTwoByte = 2,
252 |             kOneByte = 3
253 |         };
254 |         struct GlobalHeader {
255 |             int32 format;     // Represents the enum DataFormat.
256 |             float min_value;  // min_value and range represent the ranges of the integer
257 |             // data in the kTwoByte and kOneByte formats, and the
258 |             // range of the PerColHeader uint16's in the
259 |             // kOneByteWithColheaders format.
260 |             float range;
261 |             int32 num_rows;
262 |             int32 num_cols;
263 |         };
264 |         struct PerColHeader {
265 |             uint16 percentile_0;
266 |             uint16 percentile_25;
267 |             uint16 percentile_75;
268 |             uint16 percentile_100;
269 |         };
270 |         float Uint16ToFloat(const GlobalHeader &global_header, uint16 value) {
271 |             return global_header.min_value
272 |                    + global_header.range * 1.52590218966964e-05F * value;
273 |         }
274 |         float CharToFloat(float p0, float p25, float p75, float p100,
275 |                           uint8 value) {
276 |             if (value <= 64) {
277 |                 return p0 + (p25 - p0) * value * (1/64.0f);
278 |             } else if (value <= 192) {
279 |                 return p25 + (p75 - p25) * (value - 64) * (1/128.0f);
280 |             } else {
281 |                 return p75 + (p100 - p75) * (value - 192) * (1/63.0f);
282 |             }
283 |         }
284 |         uint64 DataSize(const GlobalHeader& header) {
285 |             DataFormat format = static_cast<DataFormat>(header.format);
286 |             if (format == kOneByteWithColHeaders) {
287 |                 return sizeof(GlobalHeader) +
288 |                        header.num_cols * (sizeof(PerColHeader) + header.num_rows);
289 |             } else if (format == kTwoByte) {
290 |                 return sizeof(GlobalHeader) +
291 |                        2 * header.num_rows * header.num_cols;
292 |             } else {
293 |                 return sizeof(GlobalHeader) +
294 |                        header.num_rows * header.num_cols;
295 |             }
296 |         }
297 |     };
298 |     REGISTER_KERNEL_BUILDER(Name("ReadAndDecodeKaldiMatrix").Device(DEVICE_CPU), ReadAndDecodeKaldiMatrixOp);
299 | 
300 | 
301 |     REGISTER_OP("ReadAndDecodeKaldiMatrix")
302 |             .Input("scpline: string")
303 |             .Attr("left_padding: int")
304 |             .Attr("right_padding: int")
305 |             .Output("output: float32")
306 |             .SetShapeFn(shape_inference::UnknownShape)
307 |             .Doc(R"doc(
308 | Reinterpret the bytes of a string as a kaldi matrix
309 | )doc");
310 | }
311 | 


--------------------------------------------------------------------------------
/kaldi-matrix.cc:
--------------------------------------------------------------------------------
  1 | /* Reference:
  2 |  * */
  3 | 
  4 | #include <memory>
  5 | #include <regex>
  6 | #include "tensorflow/core/framework/reader_base.h"
  7 | #include "tensorflow/core/framework/reader_op_kernel.h"
  8 | #include "tensorflow/core/lib/core/errors.h"
  9 | #include "tensorflow/core/lib/io/buffered_inputstream.h"
 10 | #include "tensorflow/core/lib/io/random_inputstream.h"
 11 | #include "tensorflow/core/lib/strings/strcat.h"
 12 | #include "tensorflow/core/platform/env.h"
 13 | #include "tensorflow/core/framework/common_shape_fns.h"
 14 | #include "shape-funcs.hh"
 15 | 
 16 | namespace tensorflow {
 17 |     using shape_util::ScalarInputsAndOutputs;
 18 |     using shape_util::TwoElementOutput;
 19 | 
 20 |     static Status ReadKaldiMatrix(Env* env, const string& ark_path, uint64 ark_offset, string* contents) {
 21 |         std::unique_ptr<RandomAccessFile> file_;
 22 |         std::unique_ptr<io::InputStreamInterface> buffered_inputstream_;
 23 |         enum { kBufferSize = 256 << 10 /* 256 kB */ };
 24 | 
 25 |         TF_RETURN_IF_ERROR(env->NewRandomAccessFile(ark_path, &file_));
 26 |         buffered_inputstream_.reset(
 27 |                 new io::BufferedInputStream(file_.get(), kBufferSize));
 28 |         TF_RETURN_IF_ERROR(buffered_inputstream_->SkipNBytes(ark_offset));
 29 | 
 30 |         // Actural reading start from here
 31 |         string binary;
 32 |         TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(2, &binary));
 33 |         CHECK_EQ(binary[0], '\0');
 34 |         CHECK_EQ(binary[1], 'B');
 35 |         string header_buffer;
 36 |         TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(3, &header_buffer));
 37 |         if (header_buffer == "CM ") {
 38 |             // format 1
 39 | 
 40 |             // Reading global_header
 41 |             string global_header;
 42 |             uint64 global_header_sz = 4 * 4;
 43 |             uint64 per_col_header_sz = 2 * 4;
 44 |             TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(global_header_sz, &global_header));
 45 |             int32 format = 1;
 46 |             float min_value, range;
 47 |             int32 num_rows, num_cols;
 48 |             memcpy(&min_value, global_header.data()+ 4 * 0, sizeof(float));
 49 |             memcpy(&range, global_header.data()    + 4 * 1, sizeof(float));
 50 |             memcpy(&num_rows, global_header.data() + 4 * 2, sizeof(int32));
 51 |             memcpy(&num_cols, global_header.data() + 4 * 3, sizeof(int32));
 52 | 
 53 |             // Calculate record size
 54 |             uint64 size = global_header_sz + num_cols * (per_col_header_sz + num_rows);
 55 |             uint64 remaining_size = size - global_header_sz;
 56 |             string data;
 57 |             TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(remaining_size, &data));
 58 |             *contents = header_buffer + global_header + data;
 59 |         } else if (header_buffer == "DM ") {
 60 |             return Status(error::UNAVAILABLE, "Kaldi Matrix of double reading is not implemented yet.");
 61 |         } else if (header_buffer == "FM "){
 62 |             string row_and_col;
 63 |             buffered_inputstream_->ReadNBytes(1+4+1+4, &row_and_col);
 64 |             int32 row, col;
 65 |             memcpy(&row, row_and_col.data()+1, sizeof(int32));
 66 |             memcpy(&col, row_and_col.data()+6, sizeof(int32));
 67 |             string data;
 68 |             buffered_inputstream_->ReadNBytes(row * col * sizeof(float), &data);
 69 |             *contents = header_buffer + row_and_col + data;
 70 |         } else {
 71 |             return Status(error::UNAVAILABLE, "Unknown Kaldi Matrix: " + header_buffer);
 72 |         }
 73 |         return Status::OK();
 74 |     }
 75 | 
 76 | 
 77 |     class ReadKaldiMatrixOp : public OpKernel {
 78 |     public:
 79 |         using OpKernel::OpKernel;
 80 |         void Compute(OpKernelContext* context) override {
 81 | 
 82 |             const Tensor* input;
 83 |             OP_REQUIRES_OK(context, context->input("scpline", &input));
 84 |             OP_REQUIRES(context, TensorShapeUtils::IsScalar(input->shape()),
 85 |                         errors::InvalidArgument(
 86 |                                 "Input filename tensor must be scalar, but had shape: ",
 87 |                                 input->shape().DebugString()));
 88 | 
 89 |             Tensor* output = nullptr;
 90 |             OP_REQUIRES_OK(context, context->allocate_output("contents",
 91 |                                                              TensorShape({}), &output));
 92 |             const std::regex id_pat("^(\\S+):(\\d+)");
 93 |             std::smatch m;
 94 |             string half_scp_line = input->scalar<string>()();
 95 |             bool matched = std::regex_search(half_scp_line, m, id_pat);
 96 |             OP_REQUIRES(context, matched, Status(error::INVALID_ARGUMENT, "Script line is " + half_scp_line));
 97 |             string ark_path = m[1];
 98 |             string ark_offset_str = m[2];
 99 |             uint64 ark_offset = std::stoull(ark_offset_str);
100 | 
101 |             OP_REQUIRES_OK(context,
102 |                            ReadKaldiMatrix(context->env(), ark_path, ark_offset,
103 |                                            &output->scalar<string>()()));
104 |         }
105 |     };
106 |     REGISTER_KERNEL_BUILDER(Name("ReadKaldiMatrix").Device(DEVICE_CPU), ReadKaldiMatrixOp);
107 | 
108 |     REGISTER_OP("ReadKaldiMatrix")
109 |             .Input("scpline: string")
110 |             .Output("contents: string")
111 |             .SetShapeFn(ScalarInputsAndOutputs)
112 |             .Doc(R"doc(
113 | Reads and outputs the contents of a record of the input kaldi ark filename.
114 | 
115 | scpline: scalar. /path/to/ark.file:12345
116 | )doc");
117 | 
118 |     class DecodeKaldiMatrixOp : public OpKernel {
119 |     public:
120 |         explicit DecodeKaldiMatrixOp(OpKernelConstruction* context) : OpKernel(context) {
121 |             OP_REQUIRES_OK(context, context->GetAttr("out_type", &out_type_));
122 |         }
123 | 
124 |         void Compute(OpKernelContext* context) override {
125 |             const auto& input = context->input(0);
126 |             int64 str_size = -1;
127 |             auto flat_in = input.flat<string>();
128 |             OP_REQUIRES(context, flat_in.size() == 1,
129 |                         errors::InvalidArgument(
130 |                                 "DecodeKaldiArk requires input string size = 1"
131 |                         )
132 |             );
133 |             const string& in_str = flat_in(0);
134 |             str_size = in_str.size();
135 | 
136 |             const char* in_data = reinterpret_cast<const char*>(flat_in(0).data());
137 |             TensorShape out_shape;
138 |             int32 num_elem = 0;
139 |             if (in_data[0] == 'C' && in_data[1] == 'M') {
140 |                 float min_value    = *reinterpret_cast<const float*>(in_data + 3 + 4*0);
141 |                 float range        = *reinterpret_cast<const float*>(in_data + 3 + 4*1);
142 |                 int32 num_rows     = *reinterpret_cast<const int32*>(in_data + 3 + 4*2);
143 |                 int32 num_cols     = *reinterpret_cast<const int32*>(in_data + 3 + 4*3);
144 |                 out_shape.AddDim(num_rows);
145 |                 out_shape.AddDim(num_cols);
146 |                 num_elem = num_rows * num_cols;
147 |             } else if (in_data[0] == 'F' && in_data[1] == 'M') {
148 |                 int32 num_rows = *reinterpret_cast<const int32*>(in_data + 3 + 1);
149 |                 int32 num_cols = *reinterpret_cast<const int32*>(in_data + 3 + 1 + 4 + 1);
150 |                 out_shape.AddDim(num_rows);
151 |                 out_shape.AddDim(num_cols);
152 |                 num_elem = num_rows * num_cols;
153 |             }
154 |             if (str_size == -1 || str_size == 0) {  // Empty input
155 |                 Tensor* output_tensor = nullptr;
156 |                 OP_REQUIRES_OK(context, context->allocate_output("output", out_shape,
157 |                                                                  &output_tensor));
158 |                 return;
159 |             }
160 | 
161 |             Tensor* output_tensor = nullptr;
162 |             OP_REQUIRES_OK(
163 |                     context, context->allocate_output("output", out_shape, &output_tensor));
164 |             auto out = output_tensor->flat<float>();
165 | 
166 |             float* out_data = out.data();
167 |             if (in_data[0] == 'C' && in_data[1] == 'M') {
168 |                 GlobalHeader header;
169 |                 header.format       = 1;
170 |                 header.min_value    = *reinterpret_cast<const float*>(in_data + 3 + 4*0);
171 |                 header.range        = *reinterpret_cast<const float*>(in_data + 3 + 4*1);
172 |                 header.num_rows     = *reinterpret_cast<const int32*>(in_data + 3 + 4*2);
173 |                 header.num_cols     = *reinterpret_cast<const int32*>(in_data + 3 + 4*3);
174 |                 const PerColHeader *per_col_header = reinterpret_cast<const PerColHeader*>(in_data + 3 + 4*4);
175 |                 const uint8* in_data_bytes = reinterpret_cast<const uint8*>(per_col_header + header.num_cols);
176 | 
177 |                 for (int32 i = 0; i < header.num_cols; i++, per_col_header++) {
178 |                     float   p0 = Uint16ToFloat(header, per_col_header->percentile_0),
179 |                             p25 = Uint16ToFloat(header, per_col_header->percentile_25),
180 |                             p75 = Uint16ToFloat(header, per_col_header->percentile_75),
181 |                             p100 = Uint16ToFloat(header, per_col_header->percentile_100);
182 | 
183 |                     for (int32 j = 0; j < header.num_rows; j ++, in_data_bytes ++) {
184 |                         float f = CharToFloat(p0, p25, p75, p100, *in_data_bytes);
185 |                         *(out_data + j * header.num_cols + i) = f;
186 |                     }
187 |                 }
188 |             } else if (in_data[0] == 'F' && in_data[1] == 'M') {
189 |                 memcpy(out_data, in_data + 3 + 10, num_elem * sizeof(float));
190 |             }
191 |         }
192 | 
193 |     private:
194 |         DataType out_type_;
195 |         struct PerColHeader {
196 |             uint16 percentile_0;
197 |             uint16 percentile_25;
198 |             uint16 percentile_75;
199 |             uint16 percentile_100;
200 |         };
201 |         struct GlobalHeader {
202 |             int32 format;
203 |             float min_value;
204 |             float range;
205 |             int32 num_rows;
206 |             int32 num_cols;
207 |         };
208 |         float Uint16ToFloat(const GlobalHeader &global_header, uint16 value) {
209 |             return global_header.min_value
210 |                    + global_header.range * 1.52590218966964e-05F * value;
211 |         }
212 |         float CharToFloat(float p0, float p25, float p75, float p100,
213 |                           uint8 value) {
214 |             if (value <= 64) {
215 |                 return p0 + (p25 - p0) * value * (1/64.0f);
216 |             } else if (value <= 192) {
217 |                 return p25 + (p75 - p25) * (value - 64) * (1/128.0f);
218 |             } else {
219 |                 return p75 + (p100 - p75) * (value - 192) * (1/63.0f);
220 |             }
221 |         }
222 |     };
223 | 
224 |     REGISTER_KERNEL_BUILDER(Name("DecodeKaldiMatrix").Device(DEVICE_CPU), DecodeKaldiMatrixOp);
225 | 
226 | 
227 |     REGISTER_OP("DecodeKaldiMatrix")
228 |             .Input("bytes: string")
229 |             .Output("output: out_type")
230 |             .Attr("out_type: {float}")
231 |             .SetShapeFn(shape_inference::UnknownShape)
232 |             .Doc(R"doc(
233 | Reinterpret the bytes of a string as a kaldi matrix
234 | )doc");
235 | }  // namespace tensorflow
236 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Kaldi ark readers for tensorflow
 2 | 
 3 | ## Introduction
 4 | 
 5 | This project aims to enable reading kaldi ark files into tensorflow. It adds
 6 | following operators to tensorflow:
 7 | 
 8 | * read_kaldi_matrix(rspecific)
 9 | * decode_kaldi_matrix(data, element-type)
10 | * read_kaldi_post_and_ali(rspecific, is_reading_post(bool))
11 | * decode_kaldi_ali(data, element-type, is_reading_post(bool))
12 | 
13 | For kaldi matrix, only float matrix are supported. Please pass "tf.float32" in
14 | through the element-type argument.
15 | 
16 | For compressed kaldi matrix, only compression method 2 (kSpeechFeature) is supported.
17 | 
18 | For alignment, user need to specify weather is reading posteriors or pdfs by the argument
19 | `is_reading_post`. Operator decode_kaldi_ali produces alignment pdfs output, in format
20 | of a one dimension int32 tensor (a int32 tensor). Please pass "tf.int32" in through the
21 | element-type argument.
22 | 
23 | Contributions are welcome. Feel free to fork and send pull request or to create issues.
24 | 
25 | ## Build
26 | 
27 | Prerequisitions:
28 | 
29 | * Linux
30 | * GCC version > 5.1 (for use of c++11 regex)
31 | * python with tensorflow installed
32 | 
33 | Steps:
34 | 
35 | 1. git clone <url-of-this-repo>
36 | 2. cd kaldi-reader-standalone
37 | 3. mkdir build && cd build
38 | 4. cmake .. -DPYTHONBIN=/path/to/your/correct/version/of/python
39 | 5. make
40 | 
41 | Then you are all set.
42 | 
43 | ## Usage example
44 | 
45 | ```python
46 |     kaldi_module = kaldi_module = tf.load_op_library("/path/to/this/project/libkaldi_readers.so")
47 |     feats_raw_value = kaldi_module.read_kaldi_matrix("/path/to/somearks/file1.ark:2321")
48 |     feats_value = kaldi_module.decode_kaldi_matrix(feats_raw_value, tf.float32)
49 |     feats_value.set_shape([None, num_dim])
50 | ```
51 | 
52 | There are some examples under the `example` directory. To run them, please modify the library path (in contents of the
53 | python files) to the correct path.
54 | 
55 | 1. cd example
56 | 2. python read-compressed-matrix.py
57 | 3. python read-uncompressed-matrix.py
58 | 4. python read-post.py
59 | 5. python read-ali.py
60 | 
61 | ## Author
62 | 
63 | Fan Ziye
64 | 
65 | ## Reference
66 | 
67 | Kaldi: https://github.com/kaldi-asr/kaldi
68 | Tensorflow: https://www.tensorflow.org/extend/adding_an_op


--------------------------------------------------------------------------------
/shape-funcs.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "shape-funcs.hh"
 3 | #include "tensorflow/core/lib/core/errors.h"
 4 | #include "tensorflow/core/platform/env.h"
 5 | #include "tensorflow/core/framework/common_shape_fns.h"
 6 | 
 7 | namespace shape_util {
 8 |     using tensorflow::shape_inference::DimensionHandle;
 9 |     using tensorflow::shape_inference::InferenceContext;
10 |     using tensorflow::shape_inference::ShapeHandle;
11 | 
12 |     tensorflow::Status ScalarInputsAndOutputs(InferenceContext *c) {
13 |         ShapeHandle unused;
14 |         for (int i = 0; i < c->num_inputs(); ++i) {
15 |             TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
16 |         }
17 |         for (int i = 0; i < c->num_outputs(); ++i) {
18 |             c->set_output(i, c->Scalar());
19 |         }
20 |         return tensorflow::Status::OK();
21 |     }
22 | 
23 |     tensorflow::Status TwoElementOutput(InferenceContext *c) {
24 |         c->set_output(0, c->Vector(2));
25 |         return tensorflow::Status::OK();
26 |     }
27 | } // namespace shape_util


--------------------------------------------------------------------------------
/shape-funcs.hh:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by zyfan on 12/3/17.
 3 | //
 4 | 
 5 | #ifndef KALDI_READER_STANDALONE_SHAPE_FUNCS_HH
 6 | #define KALDI_READER_STANDALONE_SHAPE_FUNCS_HH
 7 | #include "tensorflow/core/platform/env.h"
 8 | #include "tensorflow/core/framework/common_shape_fns.h"
 9 | 
10 | namespace shape_util {
11 |     using tensorflow::shape_inference::InferenceContext;
12 | 
13 |     tensorflow::Status ScalarInputsAndOutputs(InferenceContext *c);
14 | 
15 |     tensorflow::Status TwoElementOutput(InferenceContext *c);
16 | } // namespace shape_util
17 | 
18 | 
19 | #endif //KALDI_READER_STANDALONE_SHAPE_FUNCS_HH
20 | 


--------------------------------------------------------------------------------