├── CMakeLists.txt ├── example ├── data │ ├── ali.ark │ ├── ali.ark.txt │ ├── ali.scp │ ├── matrix-comp1.ark │ ├── matrix-comp1.scp │ ├── matrix-comp2.ark │ ├── matrix-comp2.scp │ ├── matrix-comp3.ark │ ├── matrix-comp3.scp │ ├── matrix-comp4.ark │ ├── matrix-comp4.scp │ ├── matrix-comp5.ark │ ├── matrix-comp5.scp │ ├── matrix-comp6.ark │ ├── matrix-comp6.scp │ ├── matrix-comp7.ark │ ├── matrix-comp7.scp │ ├── matrix.ark.txt │ ├── matrix.nocompress.ark │ ├── matrix.nocompress.scp │ ├── matrix.scp │ ├── post.ark │ ├── post.ark.txt │ ├── post.scp │ └── ref.txt ├── read-ali.py ├── read-compressed-matrix.py ├── read-matrix.py ├── read-post.py └── read-uncompressed-matrix.py ├── kaldi-ali.cc ├── kaldi-matrix-direct.cc ├── kaldi-matrix.cc ├── readme.md ├── shape-funcs.cc └── shape-funcs.hh /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(kaldi_reader_standalone) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) 6 | # c++ regex is used in the code, so the version of gcc must be greater than 4.9 7 | message(FATAL_ERROR "VERSION OF GCC MUST BE GREATER THAN 5.1") 8 | endif() 9 | 10 | set(PYTHONBIN "python" CACHE PATH "Path of python with tensorflow installed") 11 | 12 | execute_process( 13 | COMMAND ${PYTHONBIN} -c "import tensorflow as tf; print(tf.sysconfig.get_include())" 14 | OUTPUT_VARIABLE DEFAULT_TF_INC 15 | ERROR_VARIABLE ERROR_TF_INC 16 | RESULT_VARIABLE RESULT_TF_INC 17 | OUTPUT_STRIP_TRAILING_WHITESPACE 18 | ) 19 | execute_process( 20 | COMMAND ${PYTHONBIN} -c "import tensorflow as tf; print(tf.sysconfig.get_lib())" 21 | OUTPUT_VARIABLE DEFAULT_TF_LIB 22 | ERROR_VARIABLE ERROR_TF_LIB 23 | RESULT_VARIABLE RESULT_TF_LIB 24 | OUTPUT_STRIP_TRAILING_WHITESPACE 25 | ) 26 | 27 | execute_process( 28 | COMMAND ${PYTHONBIN} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" 29 | OUTPUT_VARIABLE DEFAULT_TF_CXX_FLAGS 30 | ERROR_VARIABLE ERROR_TF_CXX_FLAGS 31 | RESULT_VARIABLE RESULT_TF_CXX_FLAGS 32 | OUTPUT_STRIP_TRAILING_WHITESPACE 33 | ) 34 | execute_process( 35 | COMMAND ${PYTHONBIN} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" 36 | OUTPUT_VARIABLE DEFAULT_TF_LINK_FLAGS 37 | ERROR_VARIABLE ERROR_TF_LINK_FLAGS 38 | RESULT_VARIABLE RESULT_TF_LINK_FLAGS 39 | OUTPUT_STRIP_TRAILING_WHITESPACE 40 | ) 41 | set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${DEFAULT_TF_CXX_FLAGS} -std=c++11" ) 42 | set( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DEFAULT_TF_LINK_FLAGS}" ) 43 | #message("TF_INC is set: ${DEFAULT_TF_INC}") 44 | #message("TF_LIB is set: ${DEFAULT_TF_LIB}") 45 | #set(TF_INC "${DEFAULT_TF_INC}" CACHE PATH "Path of tensorflow including files") 46 | #set(TF_LIB "${DEFAULT_TF_LIB}" CACHE PATH "Path of tensorflow linking libraries") 47 | 48 | set(TF_INC "${DEFAULT_TF_INC}") 49 | set(TF_LIB "${DEFAULT_TF_LIB}") 50 | 51 | if ("${TF_INC}" STREQUAL "" OR "${TF_LIB}" STREQUAL "") 52 | message(FATAL_ERROR "TF_INC and TF_LIB not set. Please set both variable manually, or set correct PYTHONBIN var.") 53 | endif() 54 | 55 | 56 | message("TF_INC is set: ${TF_INC}") 57 | message("TF_LIB is set: ${TF_LIB}") 58 | 59 | set(SOURCE_FILES 60 | kaldi-matrix.cc 61 | kaldi-matrix-direct.cc 62 | kaldi-ali.cc 63 | shape-funcs.cc 64 | ) 65 | 66 | add_library(kaldi_readers SHARED) 67 | 68 | 69 | # -fPIC 70 | set_property(TARGET kaldi_readers PROPERTY POSITION_INDEPENDENT_CODE ON) 71 | target_sources(kaldi_readers 72 | PRIVATE 73 | ${SOURCE_FILES} 74 | ) 75 | 76 | target_include_directories(kaldi_readers 77 | PRIVATE 78 | ${TF_INC} 79 | ${TF_INC}/external/nsync/public 80 | ) 81 | target_link_libraries(kaldi_readers 82 | PRIVATE 83 | ${TF_LIB}/libtensorflow_framework.so 84 | ) 85 | 86 | -------------------------------------------------------------------------------- /example/data/ali.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/ali.ark -------------------------------------------------------------------------------- /example/data/ali.ark.txt: -------------------------------------------------------------------------------- 1 | ali-1 25 70 3013 4 0 222 444 111 2 | ali-2 1 2 3 4 5 6 7 8 -------------------------------------------------------------------------------- /example/data/ali.scp: -------------------------------------------------------------------------------- 1 | ali-1 ali.ark:6 2 | ali-2 ali.ark:59 3 | -------------------------------------------------------------------------------- /example/data/matrix-comp1.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp1.ark -------------------------------------------------------------------------------- /example/data/matrix-comp1.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix-comp1.ark:6 2 | mat-2 matrix-comp1.ark:50 3 | -------------------------------------------------------------------------------- /example/data/matrix-comp2.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp2.ark -------------------------------------------------------------------------------- /example/data/matrix-comp2.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix-comp2.ark:6 2 | mat-2 matrix-comp2.ark:73 3 | -------------------------------------------------------------------------------- /example/data/matrix-comp3.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp3.ark -------------------------------------------------------------------------------- /example/data/matrix-comp3.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix-comp3.ark:6 2 | mat-2 matrix-comp3.ark:50 3 | -------------------------------------------------------------------------------- /example/data/matrix-comp4.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp4.ark -------------------------------------------------------------------------------- /example/data/matrix-comp4.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix-comp4.ark:6 2 | mat-2 matrix-comp4.ark:50 3 | -------------------------------------------------------------------------------- /example/data/matrix-comp5.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp5.ark -------------------------------------------------------------------------------- /example/data/matrix-comp5.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix-comp5.ark:6 2 | mat-2 matrix-comp5.ark:42 3 | -------------------------------------------------------------------------------- /example/data/matrix-comp6.ark: -------------------------------------------------------------------------------- 1 | mat-1 BCM3 Cmat-2 BCM3 C -------------------------------------------------------------------------------- /example/data/matrix-comp6.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix-comp6.ark:6 2 | mat-2 matrix-comp6.ark:42 3 | -------------------------------------------------------------------------------- /example/data/matrix-comp7.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix-comp7.ark -------------------------------------------------------------------------------- /example/data/matrix-comp7.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix-comp7.ark:6 2 | mat-2 matrix-comp7.ark:42 3 | -------------------------------------------------------------------------------- /example/data/matrix.ark.txt: -------------------------------------------------------------------------------- 1 | mat-1 [ 2 | 1.0 2.0 3.0 4.0 3 | 2.0 3.0 4.0 5.0 ] 4 | mat-2 [ 5 | 7.0 7.0 7.0 7.0 6 | 2.0 3.0 4.0 5.0 ] 7 | -------------------------------------------------------------------------------- /example/data/matrix.nocompress.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/matrix.nocompress.ark -------------------------------------------------------------------------------- /example/data/matrix.nocompress.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix.nocompress.ark:6 2 | mat-2 matrix.nocompress.ark:59 3 | -------------------------------------------------------------------------------- /example/data/matrix.scp: -------------------------------------------------------------------------------- 1 | mat-1 matrix-comp1.ark:6 2 | mat-2 matrix-comp1.ark:50 3 | mat-1 matrix-comp2.ark:6 4 | mat-2 matrix-comp2.ark:73 5 | mat-1 matrix-comp3.ark:6 6 | mat-2 matrix-comp3.ark:50 7 | mat-1 matrix-comp4.ark:6 8 | mat-2 matrix-comp4.ark:50 9 | mat-1 matrix-comp5.ark:6 10 | mat-2 matrix-comp5.ark:42 11 | mat-1 matrix-comp6.ark:6 12 | mat-2 matrix-comp6.ark:42 13 | mat-1 matrix-comp7.ark:6 14 | mat-2 matrix-comp7.ark:42 15 | mat-1 matrix.nocompress.ark:6 16 | mat-2 matrix.nocompress.ark:59 17 | -------------------------------------------------------------------------------- /example/data/post.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/t13m/kaldi-readers-for-tensorflow/6c079098fec0376444b6fa51bc16602c24b70821/example/data/post.ark -------------------------------------------------------------------------------- /example/data/post.ark.txt: -------------------------------------------------------------------------------- 1 | post-1 [ 25 1 ] [ 70 1 ] [ 3013 1 ] [ 4 1 ] [ 0 1 ] [ 222 1 ] [ 444 1 ] [ 111 1 ] 2 | post-2 [ 1 1 ] [ 2 1 ] [ 3 1 ] [ 4 1 ] [ 5 1 ] [ 6 1 ] [ 7 1 ] [ 8 1 ] -------------------------------------------------------------------------------- /example/data/post.scp: -------------------------------------------------------------------------------- 1 | post-1 post.ark:7 2 | post-2 post.ark:141 3 | -------------------------------------------------------------------------------- /example/data/ref.txt: -------------------------------------------------------------------------------- 1 | comp1 2 | mat-1 [ 3 | 1 2.000015 2.999969 3.999985 4 | 2.000015 2.999969 3.999985 5 ] 5 | mat-2 [ 6 | 7 7 7 7 7 | 2 3 4 5 ] 8 | 9 | comp2 10 | mat-1 [ 11 | 1 2.000015 2.999969 3.999985 12 | 2.000015 3 4 5 ] 13 | mat-2 [ 14 | 7 7 7 7 15 | 2 3 4 5 ] 16 | 17 | comp3 18 | mat-1 [ 19 | 1 2.000015 2.999969 3.999985 20 | 2.000015 2.999969 3.999985 5 ] 21 | mat-2 [ 22 | 7 7 7 7 23 | 2 3 4 5 ] 24 | 25 | comp4 26 | mat-1 [ 27 | 1 2 3 4 28 | 2 3 4 5 ] 29 | mat-2 [ 30 | 7 7 7 7 31 | 2 3 4 5 ] 32 | 33 | comp5 34 | mat-1 [ 35 | 1 2.003922 2.992157 3.996078 36 | 2.003922 2.992157 3.996078 5 ] 37 | mat-2 [ 38 | 7 7 7 7 39 | 2 3 4 5 ] 40 | 41 | comp6 42 | mat-1 [ 43 | 1 2 3 4 44 | 2 3 4 5 ] 45 | mat-2 [ 46 | 7 7 7 7 47 | 2 3 4 5 ] 48 | 49 | comp7 50 | mat-1 [ 51 | 1 1 1 1 52 | 1 1 1 1 ] 53 | mat-2 [ 54 | 1 1 1 1 55 | 1 1 1 1 ] 56 | 57 | nocomp 58 | mat-1 [ 59 | 1 2 3 4 60 | 2 3 4 5 ] 61 | mat-2 [ 62 | 7 7 7 7 63 | 2 3 4 5 ] 64 | -------------------------------------------------------------------------------- /example/read-ali.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | kaldi_module = tf.load_op_library("../cmake-build-release/libkaldi_readers.so") 3 | 4 | def main(): 5 | value_rspecific = "./data/ali.ark:6" 6 | rspec = tf.constant(value_rspecific, tf.string) 7 | ali_raw_value = kaldi_module.read_kaldi_post_and_ali(rspec, is_reading_post=False) 8 | ali_value = kaldi_module.decode_kaldi_ali(ali_raw_value, tf.int32, is_reading_post=False, merge=False) 9 | ali_value.set_shape([None]) 10 | sess = tf.Session() 11 | sess.run(tf.global_variables_initializer()) 12 | ali = sess.run(ali_value) 13 | print(ali.shape) 14 | print(ali) 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /example/read-compressed-matrix.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | kaldi_module = tf.load_op_library("../cmake-build-release/libkaldi_readers.so") 3 | 4 | def main(): 5 | value_rspecific = "./data/matrix.compressed.ark:6" 6 | rspec = tf.constant(value_rspecific, tf.string) 7 | #feats_raw_value = kaldi_module.read_kaldi_matrix(rspec) 8 | #feats_value = kaldi_module.decode_kaldi_matrix(feats_raw_value, tf.float32) 9 | feats_value = kaldi_module.read_and_decode_kaldi_matrix(rspec, left_padding=0, right_padding=0) 10 | feats_value.set_shape([None, 4]) 11 | sess = tf.Session() 12 | sess.run(tf.global_variables_initializer()) 13 | feats = sess.run(feats_value) 14 | print(feats.shape) 15 | print(feats) 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | -------------------------------------------------------------------------------- /example/read-matrix.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tensorflow as tf 3 | kaldi_module = tf.load_op_library("../../cmake-build-debug/libkaldi_readers.so") 4 | 5 | def main(): 6 | if len(sys.argv) != 2: 7 | print("Usage: python read-matrix.py /path/to/filename.scp") 8 | return 1 9 | scpfile = sys.argv[1] 10 | with open(scpfile) as fin: 11 | scplist = fin.readlines() 12 | scplist = [scpitem.strip().split()[1] for scpitem in scplist] 13 | value_rspecific = "./data/matrix.nocompress.ark:59" 14 | rspec = tf.placeholder(tf.string) 15 | feats_value = kaldi_module.read_and_decode_kaldi_matrix(rspec, left_padding=3, right_padding=4) 16 | #feats_value.set_shape([None, 4]) 17 | feats_value.set_shape([None, None]) 18 | sess = tf.Session() 19 | sess.run(tf.global_variables_initializer()) 20 | for rspec_value in scplist: 21 | feats = sess.run(feats_value, feed_dict={rspec: rspec_value}) 22 | print(rspec_value) 23 | print(feats.shape) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | -------------------------------------------------------------------------------- /example/read-post.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | kaldi_module = tf.load_op_library("../build/libkaldi_readers.so") 3 | 4 | def main(): 5 | value_rspecific = "./data/post.ark:141" 6 | rspec = tf.constant(value_rspecific, tf.string) 7 | ali_raw_value = kaldi_module.read_kaldi_post_and_ali(rspec, is_reading_post=True) 8 | ali_value = kaldi_module.decode_kaldi_ali(ali_raw_value, tf.int32, is_reading_post=True) 9 | ali_value.set_shape([None]) 10 | sess = tf.Session() 11 | sess.run(tf.global_variables_initializer()) 12 | ali = sess.run(ali_value) 13 | print(ali.shape) 14 | print(ali) 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /example/read-uncompressed-matrix.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | kaldi_module = tf.load_op_library("../build/libkaldi_readers.so") 3 | 4 | def main(): 5 | value_rspecific = "./data/matrix.nocompress.ark:59" 6 | rspec = tf.constant(value_rspecific, tf.string) 7 | feats_value = kaldi_module.read_and_decode_kaldi_matrix(rspec, left_padding=3, right_padding=4) 8 | feats_value.set_shape([None, 4]) 9 | sess = tf.Session() 10 | sess.run(tf.global_variables_initializer()) 11 | feats = sess.run(feats_value) 12 | print(feats.shape) 13 | print(feats) 14 | 15 | 16 | if __name__ == "__main__": 17 | main() 18 | -------------------------------------------------------------------------------- /kaldi-ali.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tensorflow/core/framework/reader_base.h" 4 | #include "tensorflow/core/framework/reader_op_kernel.h" 5 | #include "tensorflow/core/lib/core/errors.h" 6 | #include "tensorflow/core/lib/io/buffered_inputstream.h" 7 | #include "tensorflow/core/lib/io/random_inputstream.h" 8 | #include "tensorflow/core/lib/io/zlib_compression_options.h" 9 | #include "tensorflow/core/lib/io/zlib_inputstream.h" 10 | #include "tensorflow/core/lib/strings/strcat.h" 11 | #include "tensorflow/core/platform/env.h" 12 | 13 | #include "shape-funcs.hh" 14 | 15 | namespace tensorflow { 16 | using shape_util::ScalarInputsAndOutputs; 17 | using shape_util::TwoElementOutput; 18 | 19 | static Status ReadKaldiPostAndAli(Env* env, const string& ark_path, uint64 ark_offset, bool is_reading_post, string* contents) { 20 | enum { kBufferSize = 256 << 10 /* 256 kB */ }; 21 | 22 | std::unique_ptr file_; 23 | std::unique_ptr buffered_inputstream_; 24 | 25 | TF_RETURN_IF_ERROR(env->NewRandomAccessFile(ark_path, &file_)); 26 | buffered_inputstream_.reset( 27 | new io::BufferedInputStream(file_.get(), kBufferSize)); 28 | TF_RETURN_IF_ERROR(buffered_inputstream_->SkipNBytes(ark_offset)); 29 | 30 | // Actural reading start from here 31 | string binary; 32 | TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(2, &binary)); 33 | CHECK_EQ(binary[0], '\0'); 34 | CHECK_EQ(binary[1], 'B'); 35 | string header_buffer; 36 | TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(1, &header_buffer)); 37 | if (header_buffer[0] == '\4') { 38 | // This is a vector of int 39 | string size_str; 40 | buffered_inputstream_->ReadNBytes(4, &size_str); 41 | int32 size = *reinterpret_cast(size_str.data()); 42 | string data; 43 | if (is_reading_post) { 44 | for (int32 outer_vec_idx = 0; outer_vec_idx < size; outer_vec_idx++) { 45 | // <1> <4> [<1> <4> <1> <4>] [<1> <4> <1> <4>] 46 | string inner_size_str; 47 | buffered_inputstream_->ReadNBytes(5, &inner_size_str); 48 | int32 inner_size = *reinterpret_cast(inner_size_str.data() + 1); 49 | string inner_vec_data; 50 | buffered_inputstream_->ReadNBytes(inner_size * 10, &inner_vec_data); 51 | data += inner_size_str + inner_vec_data; 52 | } 53 | } else { 54 | TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(size * 5, &data)); 55 | } 56 | *contents = header_buffer + size_str + data; 57 | } else { 58 | return Status(error::UNAVAILABLE, "Unknown Kaldi Post or Ali: " + header_buffer); 59 | } 60 | return Status::OK(); 61 | } 62 | 63 | class ReadKaldiPostAndAliOp : public OpKernel { 64 | public: 65 | using OpKernel::OpKernel; 66 | explicit ReadKaldiPostAndAliOp(OpKernelConstruction *context) 67 | :OpKernel(context), 68 | id_pat_("^(\\S+):(\\d+)") 69 | { 70 | OP_REQUIRES_OK(context, context->GetAttr("is_reading_post", &is_reading_post_)); 71 | } 72 | void Compute(OpKernelContext* context) override { 73 | 74 | const Tensor* input; 75 | 76 | OP_REQUIRES_OK(context, context->input("scpline", &input)); 77 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(input->shape()), 78 | errors::InvalidArgument( 79 | "Input filename tensor must be scalar, but had shape: ", 80 | input->shape().DebugString())); 81 | 82 | Tensor* output = nullptr; 83 | OP_REQUIRES_OK(context, context->allocate_output("contents", 84 | TensorShape({}), &output)); 85 | const std::regex id_pat("^(\\S+):(\\d+)"); 86 | std::smatch m; 87 | string half_scp_line = input->scalar()(); 88 | bool matched = std::regex_search(half_scp_line, m, id_pat); 89 | OP_REQUIRES(context, matched, Status(error::INVALID_ARGUMENT, "Script line is " + half_scp_line)); 90 | string ark_path = m[1]; 91 | string ark_offset_str = m[2]; 92 | uint64 ark_offset = std::stoull(ark_offset_str); 93 | 94 | OP_REQUIRES_OK(context, 95 | ReadKaldiPostAndAli(context->env(), ark_path, ark_offset, is_reading_post_, 96 | &output->scalar()())); 97 | } 98 | private: 99 | bool is_reading_post_; 100 | const std::regex id_pat_; 101 | }; 102 | REGISTER_KERNEL_BUILDER(Name("ReadKaldiPostAndAli").Device(DEVICE_CPU), ReadKaldiPostAndAliOp); 103 | 104 | REGISTER_OP("ReadKaldiPostAndAli") 105 | .Attr("is_reading_post: bool") 106 | .Input("scpline: string") 107 | .Output("contents: string") 108 | .SetShapeFn(ScalarInputsAndOutputs) 109 | .Doc(R"doc( 110 | Reads and outputs the entire contents of the input kaldi post or ali ark filename. 111 | 112 | scpline: scalar. /path/to/ark.file:12345 113 | )doc"); 114 | 115 | class DecodeKaldiAliOp : public OpKernel { 116 | public: 117 | explicit DecodeKaldiAliOp(OpKernelConstruction* context) : OpKernel(context) { 118 | OP_REQUIRES_OK(context, context->GetAttr("out_type", &out_type_)); 119 | OP_REQUIRES_OK(context, context->GetAttr("is_reading_post", &is_reading_post_)); 120 | OP_REQUIRES_OK(context, context->GetAttr("merge", &merge_)); 121 | } 122 | 123 | void Compute(OpKernelContext* context) override { 124 | const auto& input = context->input(0); 125 | int64 str_size = -1; 126 | auto flat_in = input.flat(); 127 | OP_REQUIRES(context, flat_in.size() == 1, 128 | errors::InvalidArgument( 129 | "DecodeKaldiAliOp requires input string size = 1" 130 | ) 131 | ); 132 | const string& in_str = flat_in(0); 133 | str_size = in_str.size(); 134 | 135 | const char* in_data = reinterpret_cast(flat_in(0).data()); 136 | TensorShape out_shape; 137 | int32 num_elem = *reinterpret_cast(in_data + 1); 138 | if (!merge_) { 139 | out_shape.AddDim(num_elem); 140 | } else { 141 | int32 prev_elem = -1; 142 | int32 count = 0; 143 | const char* p = in_data + 5; 144 | for (int32 frame_idx = 0; frame_idx < num_elem; frame_idx ++) { 145 | int32 curr_elem; 146 | if (is_reading_post_) { 147 | curr_elem = *reinterpret_cast(p + 5 + 1); 148 | p += 15; 149 | } else { 150 | curr_elem = *reinterpret_cast(p + 1); 151 | p += 5; 152 | } 153 | if (curr_elem != prev_elem) { 154 | count ++; 155 | prev_elem = curr_elem; 156 | } 157 | } 158 | out_shape.AddDim(count); 159 | } 160 | 161 | if (str_size == -1 || str_size == 0) { // Empty input 162 | Tensor* output_tensor = nullptr; 163 | OP_REQUIRES_OK(context, context->allocate_output("output", out_shape, 164 | &output_tensor)); 165 | return; 166 | } 167 | 168 | Tensor* output_tensor = nullptr; 169 | OP_REQUIRES_OK( 170 | context, context->allocate_output("output", out_shape, &output_tensor)); 171 | auto out = output_tensor->flat(); 172 | 173 | int32* out_data = out.data(); 174 | const char* in_bytes = in_data + 5; 175 | if (!merge_) { 176 | if (is_reading_post_) { 177 | int32 prev_elem = -1; 178 | for (int32 frame_idx = 0; frame_idx < num_elem; frame_idx++) { 179 | out_data[frame_idx] = *reinterpret_cast(in_bytes + 5 + 1); 180 | in_bytes += 15; 181 | } 182 | } else { 183 | for (int32 frame_idx = 0; frame_idx < num_elem; frame_idx++) { 184 | out_data[frame_idx] = *reinterpret_cast(in_bytes + 1); 185 | in_bytes += 5; 186 | } 187 | } 188 | } else { 189 | int32 prev_elem = -1; 190 | int32 count = 0; 191 | for (int32 frame_idx = 0; frame_idx < num_elem; frame_idx++) { 192 | int32 curr_elem; 193 | if (is_reading_post_) { 194 | curr_elem = *reinterpret_cast(in_bytes + 5 + 1); 195 | in_bytes += 15; 196 | } else { 197 | curr_elem = *reinterpret_cast(in_bytes + 1); 198 | in_bytes += 5; 199 | } 200 | if (curr_elem != prev_elem) { 201 | out_data[count] = curr_elem; 202 | count ++; 203 | prev_elem = curr_elem; 204 | } 205 | } 206 | } 207 | } 208 | 209 | private: 210 | bool is_reading_post_; 211 | bool merge_; 212 | DataType out_type_; 213 | 214 | }; 215 | 216 | REGISTER_KERNEL_BUILDER(Name("DecodeKaldiAli").Device(DEVICE_CPU), DecodeKaldiAliOp); 217 | 218 | REGISTER_OP("DecodeKaldiAli") 219 | .Input("bytes: string") 220 | .Output("output: out_type") 221 | .Attr("out_type: {int32}") 222 | .Attr("is_reading_post: bool") 223 | .Attr("merge: bool") 224 | .SetShapeFn(shape_inference::UnknownShape) 225 | .Doc(R"doc( 226 | Reinterpret the bytes of a string as a kaldi ali 227 | )doc"); 228 | 229 | 230 | } // namespace tensorflow 231 | -------------------------------------------------------------------------------- /kaldi-matrix-direct.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tensorflow/core/framework/reader_base.h" 4 | #include "tensorflow/core/framework/reader_op_kernel.h" 5 | #include "tensorflow/core/lib/core/errors.h" 6 | #include "tensorflow/core/lib/io/buffered_inputstream.h" 7 | #include "tensorflow/core/lib/io/random_inputstream.h" 8 | #include "tensorflow/core/lib/strings/strcat.h" 9 | #include "tensorflow/core/platform/env.h" 10 | #include "tensorflow/core/framework/common_shape_fns.h" 11 | #include "shape-funcs.hh" 12 | 13 | namespace tensorflow { 14 | using shape_util::ScalarInputsAndOutputs; 15 | using shape_util::TwoElementOutput; 16 | 17 | class ReadAndDecodeKaldiMatrixOp : public OpKernel { 18 | public: 19 | 20 | using OpKernel::OpKernel; 21 | 22 | explicit ReadAndDecodeKaldiMatrixOp(OpKernelConstruction* context): OpKernel(context) { 23 | OP_REQUIRES_OK(context, context->GetAttr("left_padding", &left_padding_)); 24 | OP_REQUIRES_OK(context, context->GetAttr("right_padding", &right_padding_)); 25 | } 26 | void Compute(OpKernelContext* context) override { 27 | 28 | const Tensor* input; 29 | OP_REQUIRES_OK(context, context->input("scpline", &input)); 30 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(input->shape()), 31 | errors::InvalidArgument( 32 | "Input filename tensor must be scalar, but had shape: ", 33 | input->shape().DebugString())); 34 | 35 | const std::regex id_pat("^(\\S+):(\\d+)"); 36 | std::smatch m; 37 | string half_scp_line = input->scalar()(); 38 | bool matched = std::regex_search(half_scp_line, m, id_pat); 39 | OP_REQUIRES(context, matched, Status(error::INVALID_ARGUMENT, "Script line is " + half_scp_line)); 40 | string ark_path = m[1]; 41 | string ark_offset_str = m[2]; 42 | uint64 ark_offset = std::stoull(ark_offset_str); 43 | 44 | std::unique_ptr file; 45 | OP_REQUIRES_OK(context, context->env()->NewRandomAccessFile(ark_path, &file)); 46 | uint64 rel_offset = 0; 47 | StringPiece data_holder; 48 | char data_header[10]; 49 | OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, 2, &data_holder, data_header)); 50 | rel_offset += 2; 51 | bool is_binary = (data_header[0] == '\0' && data_header[1] == 'B'); 52 | OP_REQUIRES(context, is_binary, Status(error::INVALID_ARGUMENT, 53 | "We only support binary format ark.")); 54 | 55 | OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, 3, &data_holder, data_header)); 56 | rel_offset += 3; 57 | 58 | TensorShape out_shape; 59 | 60 | if (data_holder == "FM ") { 61 | int8 row_nbyte; 62 | int32 row; 63 | int8 col_nbyte; 64 | int32 col; 65 | OP_REQUIRES_OK(context, 66 | file->Read(ark_offset + rel_offset, 1, &data_holder, 67 | reinterpret_cast(&row_nbyte))); 68 | rel_offset += 1; 69 | OP_REQUIRES_OK(context, 70 | file->Read(ark_offset + rel_offset, 4, &data_holder, 71 | reinterpret_cast(&row))); 72 | rel_offset += 4; 73 | OP_REQUIRES_OK(context, 74 | file->Read(ark_offset + rel_offset, 1, &data_holder, 75 | reinterpret_cast(&col_nbyte))); 76 | rel_offset += 1; 77 | OP_REQUIRES_OK(context, 78 | file->Read(ark_offset + rel_offset, 4, &data_holder, 79 | reinterpret_cast(&col))); 80 | rel_offset += 4; 81 | 82 | out_shape.AddDim(left_padding_ + row + right_padding_); 83 | out_shape.AddDim(col); 84 | Tensor* output_tensor = nullptr; 85 | OP_REQUIRES_OK( 86 | context, context->allocate_output("output", out_shape, &output_tensor)); 87 | auto out = output_tensor->flat(); 88 | 89 | float* out_data = out.data(); 90 | OP_REQUIRES_OK(context, 91 | file->Read(ark_offset + rel_offset, row * col * sizeof(float), &data_holder, 92 | reinterpret_cast(out_data + left_padding_ * col))); 93 | 94 | for (int64 i = 0; i < left_padding_; i ++) { 95 | for (int j = 0; j < col; j ++) { 96 | *(out_data + i * col + j) = *(out_data + left_padding_ * col + j); 97 | } 98 | } 99 | for (int64 i = left_padding_ + row; i < left_padding_ + row + right_padding_; i ++) { 100 | for (int j = 0; j < col; j ++) { 101 | *(out_data + i * col + j) = *(out_data + (left_padding_ + row - 1) * col + j); 102 | } 103 | } 104 | 105 | } else if (data_holder == "CM ") { 106 | GlobalHeader h; 107 | h.format = 1; 108 | OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, sizeof(h) - 4, &data_holder, 109 | reinterpret_cast(&h) + 4)); 110 | rel_offset += (sizeof(h) - 4); 111 | out_shape.AddDim(left_padding_ + h.num_rows + right_padding_); 112 | out_shape.AddDim(h.num_cols); 113 | Tensor* output_tensor = nullptr; 114 | OP_REQUIRES_OK( 115 | context, context->allocate_output("output", out_shape, &output_tensor)); 116 | auto out = output_tensor->flat(); 117 | 118 | uint64 remaining_size = h.num_cols * (h.num_rows + sizeof(PerColHeader)); 119 | string compressed_buffer; 120 | compressed_buffer.resize(remaining_size); 121 | OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, remaining_size, &data_holder, 122 | &compressed_buffer[0])); 123 | rel_offset += remaining_size; 124 | 125 | float* out_data = out.data(); 126 | const char* in_data = compressed_buffer.data(); 127 | 128 | const PerColHeader *per_col_header = reinterpret_cast(in_data); 129 | const uint8 *in_data_bytes = reinterpret_cast(per_col_header + h.num_cols); 130 | for (int64 i = 0; i < h.num_cols; i++, per_col_header++) { 131 | float p0 = Uint16ToFloat(h, per_col_header->percentile_0), 132 | p25 = Uint16ToFloat(h, per_col_header->percentile_25), 133 | p75 = Uint16ToFloat(h, per_col_header->percentile_75), 134 | p100 = Uint16ToFloat(h, per_col_header->percentile_100); 135 | 136 | for (int64 j = left_padding_; j < left_padding_ + h.num_rows; j ++, in_data_bytes ++) { 137 | float f = CharToFloat(p0, p25, p75, p100, *in_data_bytes); 138 | *(out_data + j * h.num_cols + i) = f; 139 | } 140 | } 141 | 142 | for (int64 i = 0; i < left_padding_; i ++) { 143 | for (int j = 0; j < h.num_cols; j ++) { 144 | *(out_data + i * h.num_cols + j) = *(out_data + left_padding_ * h.num_cols + j); 145 | } 146 | } 147 | for (int64 i = left_padding_ + h.num_rows; i < left_padding_ + h.num_rows + right_padding_; i ++) { 148 | for (int j = 0; j < h.num_cols; j ++) { 149 | *(out_data + i * h.num_cols + j) = *(out_data + (left_padding_ + h.num_rows - 1) * h.num_cols + j); 150 | } 151 | } 152 | } else if (data_holder == "CM2") { 153 | rel_offset ++; 154 | GlobalHeader h; 155 | h.format = 2; 156 | OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, sizeof(h) - 4, &data_holder, 157 | reinterpret_cast(&h) + 4)); 158 | rel_offset += (sizeof(h) - 4); 159 | out_shape.AddDim(left_padding_ + h.num_rows + right_padding_); 160 | out_shape.AddDim(h.num_cols); 161 | Tensor* output_tensor = nullptr; 162 | OP_REQUIRES_OK( 163 | context, context->allocate_output("output", out_shape, &output_tensor)); 164 | auto out = output_tensor->flat(); 165 | 166 | uint64 size = DataSize(h); 167 | uint64 remaining_size = size - sizeof(GlobalHeader); 168 | string compressed_buffer; 169 | compressed_buffer.resize(remaining_size); 170 | OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, remaining_size, &data_holder, 171 | &compressed_buffer[0])); 172 | rel_offset += remaining_size; 173 | 174 | float* out_data = out.data(); 175 | const char* in_data = compressed_buffer.data(); 176 | 177 | const uint16 *in_data_uint16 = reinterpret_cast(in_data); 178 | float min_value = h.min_value; 179 | float increment = h.range * (1.0 / 65535.0); 180 | for (int64 i = left_padding_; i < left_padding_ + h.num_rows; i++) { 181 | for (int64 j = 0; j < h.num_cols; j++) { 182 | *(out_data + i * h.num_cols + j) = min_value + in_data_uint16[j] * increment; 183 | } 184 | in_data_uint16 += h.num_cols; 185 | } 186 | for (int64 i = 0; i < left_padding_; i ++) { 187 | for (int j = 0; j < h.num_cols; j ++) { 188 | *(out_data + i * h.num_cols + j) = *(out_data + left_padding_ * h.num_cols + j); 189 | } 190 | } 191 | for (int64 i = left_padding_ + h.num_rows; i < left_padding_ + h.num_rows + right_padding_; i ++) { 192 | for (int j = 0; j < h.num_cols; j ++) { 193 | *(out_data + i * h.num_cols + j) = *(out_data + (left_padding_ + h.num_rows - 1) * h.num_cols + j); 194 | } 195 | } 196 | } else if (data_holder == "CM3") { 197 | rel_offset ++; 198 | GlobalHeader h; 199 | h.format = 3; 200 | OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, sizeof(h) - 4, &data_holder, 201 | reinterpret_cast(&h) + 4)); 202 | rel_offset += (sizeof(h) - 4); 203 | out_shape.AddDim(left_padding_ + h.num_rows + right_padding_); 204 | out_shape.AddDim(h.num_cols); 205 | Tensor* output_tensor = nullptr; 206 | OP_REQUIRES_OK( 207 | context, context->allocate_output("output", out_shape, &output_tensor)); 208 | auto out = output_tensor->flat(); 209 | 210 | uint64 size = DataSize(h); 211 | uint64 remaining_size = size - sizeof(GlobalHeader); 212 | string compressed_buffer; 213 | compressed_buffer.resize(remaining_size); 214 | OP_REQUIRES_OK(context, file->Read(ark_offset + rel_offset, remaining_size, &data_holder, 215 | &compressed_buffer[0])); 216 | rel_offset += remaining_size; 217 | 218 | float* out_data = out.data(); 219 | const char* in_data = compressed_buffer.data(); 220 | 221 | float min_value = h.min_value, increment = h.range * (1.0 / 255.0); 222 | const uint8 *in_data_bytes = reinterpret_cast(in_data); 223 | for (int64 i = left_padding_; i < left_padding_ + h.num_rows; i++) { 224 | for (int64 j = 0; j < h.num_cols; j ++) { 225 | *(out_data + i * h.num_cols + j) = h.min_value + in_data_bytes[j] * increment; 226 | } 227 | in_data_bytes += h.num_cols; 228 | } 229 | for (int64 i = 0; i < left_padding_; i ++) { 230 | for (int j = 0; j < h.num_cols; j ++) { 231 | *(out_data + i * h.num_cols + j) = *(out_data + left_padding_ * h.num_cols + j); 232 | } 233 | } 234 | for (int64 i = left_padding_ + h.num_rows; i < left_padding_ + h.num_rows + right_padding_; i ++) { 235 | for (int j = 0; j < h.num_cols; j ++) { 236 | *(out_data + i * h.num_cols + j) = *(out_data + (left_padding_ + h.num_rows - 1) * h.num_cols + j); 237 | } 238 | } 239 | } else { 240 | OP_REQUIRES_OK(context, Status(error::UNAVAILABLE, 241 | "Unknown Kaldi Matrix:" + data_holder.ToString() + 242 | " When reading \"" + half_scp_line + "\"" + 243 | " Ark: " + ark_path + 244 | " OFFSET: " + std::to_string(ark_offset) )); 245 | } 246 | } 247 | private: 248 | int64 left_padding_, right_padding_; 249 | enum DataFormat { 250 | kOneByteWithColHeaders = 1, 251 | kTwoByte = 2, 252 | kOneByte = 3 253 | }; 254 | struct GlobalHeader { 255 | int32 format; // Represents the enum DataFormat. 256 | float min_value; // min_value and range represent the ranges of the integer 257 | // data in the kTwoByte and kOneByte formats, and the 258 | // range of the PerColHeader uint16's in the 259 | // kOneByteWithColheaders format. 260 | float range; 261 | int32 num_rows; 262 | int32 num_cols; 263 | }; 264 | struct PerColHeader { 265 | uint16 percentile_0; 266 | uint16 percentile_25; 267 | uint16 percentile_75; 268 | uint16 percentile_100; 269 | }; 270 | float Uint16ToFloat(const GlobalHeader &global_header, uint16 value) { 271 | return global_header.min_value 272 | + global_header.range * 1.52590218966964e-05F * value; 273 | } 274 | float CharToFloat(float p0, float p25, float p75, float p100, 275 | uint8 value) { 276 | if (value <= 64) { 277 | return p0 + (p25 - p0) * value * (1/64.0f); 278 | } else if (value <= 192) { 279 | return p25 + (p75 - p25) * (value - 64) * (1/128.0f); 280 | } else { 281 | return p75 + (p100 - p75) * (value - 192) * (1/63.0f); 282 | } 283 | } 284 | uint64 DataSize(const GlobalHeader& header) { 285 | DataFormat format = static_cast(header.format); 286 | if (format == kOneByteWithColHeaders) { 287 | return sizeof(GlobalHeader) + 288 | header.num_cols * (sizeof(PerColHeader) + header.num_rows); 289 | } else if (format == kTwoByte) { 290 | return sizeof(GlobalHeader) + 291 | 2 * header.num_rows * header.num_cols; 292 | } else { 293 | return sizeof(GlobalHeader) + 294 | header.num_rows * header.num_cols; 295 | } 296 | } 297 | }; 298 | REGISTER_KERNEL_BUILDER(Name("ReadAndDecodeKaldiMatrix").Device(DEVICE_CPU), ReadAndDecodeKaldiMatrixOp); 299 | 300 | 301 | REGISTER_OP("ReadAndDecodeKaldiMatrix") 302 | .Input("scpline: string") 303 | .Attr("left_padding: int") 304 | .Attr("right_padding: int") 305 | .Output("output: float32") 306 | .SetShapeFn(shape_inference::UnknownShape) 307 | .Doc(R"doc( 308 | Reinterpret the bytes of a string as a kaldi matrix 309 | )doc"); 310 | } 311 | -------------------------------------------------------------------------------- /kaldi-matrix.cc: -------------------------------------------------------------------------------- 1 | /* Reference: 2 | * */ 3 | 4 | #include 5 | #include 6 | #include "tensorflow/core/framework/reader_base.h" 7 | #include "tensorflow/core/framework/reader_op_kernel.h" 8 | #include "tensorflow/core/lib/core/errors.h" 9 | #include "tensorflow/core/lib/io/buffered_inputstream.h" 10 | #include "tensorflow/core/lib/io/random_inputstream.h" 11 | #include "tensorflow/core/lib/strings/strcat.h" 12 | #include "tensorflow/core/platform/env.h" 13 | #include "tensorflow/core/framework/common_shape_fns.h" 14 | #include "shape-funcs.hh" 15 | 16 | namespace tensorflow { 17 | using shape_util::ScalarInputsAndOutputs; 18 | using shape_util::TwoElementOutput; 19 | 20 | static Status ReadKaldiMatrix(Env* env, const string& ark_path, uint64 ark_offset, string* contents) { 21 | std::unique_ptr file_; 22 | std::unique_ptr buffered_inputstream_; 23 | enum { kBufferSize = 256 << 10 /* 256 kB */ }; 24 | 25 | TF_RETURN_IF_ERROR(env->NewRandomAccessFile(ark_path, &file_)); 26 | buffered_inputstream_.reset( 27 | new io::BufferedInputStream(file_.get(), kBufferSize)); 28 | TF_RETURN_IF_ERROR(buffered_inputstream_->SkipNBytes(ark_offset)); 29 | 30 | // Actural reading start from here 31 | string binary; 32 | TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(2, &binary)); 33 | CHECK_EQ(binary[0], '\0'); 34 | CHECK_EQ(binary[1], 'B'); 35 | string header_buffer; 36 | TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(3, &header_buffer)); 37 | if (header_buffer == "CM ") { 38 | // format 1 39 | 40 | // Reading global_header 41 | string global_header; 42 | uint64 global_header_sz = 4 * 4; 43 | uint64 per_col_header_sz = 2 * 4; 44 | TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(global_header_sz, &global_header)); 45 | int32 format = 1; 46 | float min_value, range; 47 | int32 num_rows, num_cols; 48 | memcpy(&min_value, global_header.data()+ 4 * 0, sizeof(float)); 49 | memcpy(&range, global_header.data() + 4 * 1, sizeof(float)); 50 | memcpy(&num_rows, global_header.data() + 4 * 2, sizeof(int32)); 51 | memcpy(&num_cols, global_header.data() + 4 * 3, sizeof(int32)); 52 | 53 | // Calculate record size 54 | uint64 size = global_header_sz + num_cols * (per_col_header_sz + num_rows); 55 | uint64 remaining_size = size - global_header_sz; 56 | string data; 57 | TF_RETURN_IF_ERROR(buffered_inputstream_->ReadNBytes(remaining_size, &data)); 58 | *contents = header_buffer + global_header + data; 59 | } else if (header_buffer == "DM ") { 60 | return Status(error::UNAVAILABLE, "Kaldi Matrix of double reading is not implemented yet."); 61 | } else if (header_buffer == "FM "){ 62 | string row_and_col; 63 | buffered_inputstream_->ReadNBytes(1+4+1+4, &row_and_col); 64 | int32 row, col; 65 | memcpy(&row, row_and_col.data()+1, sizeof(int32)); 66 | memcpy(&col, row_and_col.data()+6, sizeof(int32)); 67 | string data; 68 | buffered_inputstream_->ReadNBytes(row * col * sizeof(float), &data); 69 | *contents = header_buffer + row_and_col + data; 70 | } else { 71 | return Status(error::UNAVAILABLE, "Unknown Kaldi Matrix: " + header_buffer); 72 | } 73 | return Status::OK(); 74 | } 75 | 76 | 77 | class ReadKaldiMatrixOp : public OpKernel { 78 | public: 79 | using OpKernel::OpKernel; 80 | void Compute(OpKernelContext* context) override { 81 | 82 | const Tensor* input; 83 | OP_REQUIRES_OK(context, context->input("scpline", &input)); 84 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(input->shape()), 85 | errors::InvalidArgument( 86 | "Input filename tensor must be scalar, but had shape: ", 87 | input->shape().DebugString())); 88 | 89 | Tensor* output = nullptr; 90 | OP_REQUIRES_OK(context, context->allocate_output("contents", 91 | TensorShape({}), &output)); 92 | const std::regex id_pat("^(\\S+):(\\d+)"); 93 | std::smatch m; 94 | string half_scp_line = input->scalar()(); 95 | bool matched = std::regex_search(half_scp_line, m, id_pat); 96 | OP_REQUIRES(context, matched, Status(error::INVALID_ARGUMENT, "Script line is " + half_scp_line)); 97 | string ark_path = m[1]; 98 | string ark_offset_str = m[2]; 99 | uint64 ark_offset = std::stoull(ark_offset_str); 100 | 101 | OP_REQUIRES_OK(context, 102 | ReadKaldiMatrix(context->env(), ark_path, ark_offset, 103 | &output->scalar()())); 104 | } 105 | }; 106 | REGISTER_KERNEL_BUILDER(Name("ReadKaldiMatrix").Device(DEVICE_CPU), ReadKaldiMatrixOp); 107 | 108 | REGISTER_OP("ReadKaldiMatrix") 109 | .Input("scpline: string") 110 | .Output("contents: string") 111 | .SetShapeFn(ScalarInputsAndOutputs) 112 | .Doc(R"doc( 113 | Reads and outputs the contents of a record of the input kaldi ark filename. 114 | 115 | scpline: scalar. /path/to/ark.file:12345 116 | )doc"); 117 | 118 | class DecodeKaldiMatrixOp : public OpKernel { 119 | public: 120 | explicit DecodeKaldiMatrixOp(OpKernelConstruction* context) : OpKernel(context) { 121 | OP_REQUIRES_OK(context, context->GetAttr("out_type", &out_type_)); 122 | } 123 | 124 | void Compute(OpKernelContext* context) override { 125 | const auto& input = context->input(0); 126 | int64 str_size = -1; 127 | auto flat_in = input.flat(); 128 | OP_REQUIRES(context, flat_in.size() == 1, 129 | errors::InvalidArgument( 130 | "DecodeKaldiArk requires input string size = 1" 131 | ) 132 | ); 133 | const string& in_str = flat_in(0); 134 | str_size = in_str.size(); 135 | 136 | const char* in_data = reinterpret_cast(flat_in(0).data()); 137 | TensorShape out_shape; 138 | int32 num_elem = 0; 139 | if (in_data[0] == 'C' && in_data[1] == 'M') { 140 | float min_value = *reinterpret_cast(in_data + 3 + 4*0); 141 | float range = *reinterpret_cast(in_data + 3 + 4*1); 142 | int32 num_rows = *reinterpret_cast(in_data + 3 + 4*2); 143 | int32 num_cols = *reinterpret_cast(in_data + 3 + 4*3); 144 | out_shape.AddDim(num_rows); 145 | out_shape.AddDim(num_cols); 146 | num_elem = num_rows * num_cols; 147 | } else if (in_data[0] == 'F' && in_data[1] == 'M') { 148 | int32 num_rows = *reinterpret_cast(in_data + 3 + 1); 149 | int32 num_cols = *reinterpret_cast(in_data + 3 + 1 + 4 + 1); 150 | out_shape.AddDim(num_rows); 151 | out_shape.AddDim(num_cols); 152 | num_elem = num_rows * num_cols; 153 | } 154 | if (str_size == -1 || str_size == 0) { // Empty input 155 | Tensor* output_tensor = nullptr; 156 | OP_REQUIRES_OK(context, context->allocate_output("output", out_shape, 157 | &output_tensor)); 158 | return; 159 | } 160 | 161 | Tensor* output_tensor = nullptr; 162 | OP_REQUIRES_OK( 163 | context, context->allocate_output("output", out_shape, &output_tensor)); 164 | auto out = output_tensor->flat(); 165 | 166 | float* out_data = out.data(); 167 | if (in_data[0] == 'C' && in_data[1] == 'M') { 168 | GlobalHeader header; 169 | header.format = 1; 170 | header.min_value = *reinterpret_cast(in_data + 3 + 4*0); 171 | header.range = *reinterpret_cast(in_data + 3 + 4*1); 172 | header.num_rows = *reinterpret_cast(in_data + 3 + 4*2); 173 | header.num_cols = *reinterpret_cast(in_data + 3 + 4*3); 174 | const PerColHeader *per_col_header = reinterpret_cast(in_data + 3 + 4*4); 175 | const uint8* in_data_bytes = reinterpret_cast(per_col_header + header.num_cols); 176 | 177 | for (int32 i = 0; i < header.num_cols; i++, per_col_header++) { 178 | float p0 = Uint16ToFloat(header, per_col_header->percentile_0), 179 | p25 = Uint16ToFloat(header, per_col_header->percentile_25), 180 | p75 = Uint16ToFloat(header, per_col_header->percentile_75), 181 | p100 = Uint16ToFloat(header, per_col_header->percentile_100); 182 | 183 | for (int32 j = 0; j < header.num_rows; j ++, in_data_bytes ++) { 184 | float f = CharToFloat(p0, p25, p75, p100, *in_data_bytes); 185 | *(out_data + j * header.num_cols + i) = f; 186 | } 187 | } 188 | } else if (in_data[0] == 'F' && in_data[1] == 'M') { 189 | memcpy(out_data, in_data + 3 + 10, num_elem * sizeof(float)); 190 | } 191 | } 192 | 193 | private: 194 | DataType out_type_; 195 | struct PerColHeader { 196 | uint16 percentile_0; 197 | uint16 percentile_25; 198 | uint16 percentile_75; 199 | uint16 percentile_100; 200 | }; 201 | struct GlobalHeader { 202 | int32 format; 203 | float min_value; 204 | float range; 205 | int32 num_rows; 206 | int32 num_cols; 207 | }; 208 | float Uint16ToFloat(const GlobalHeader &global_header, uint16 value) { 209 | return global_header.min_value 210 | + global_header.range * 1.52590218966964e-05F * value; 211 | } 212 | float CharToFloat(float p0, float p25, float p75, float p100, 213 | uint8 value) { 214 | if (value <= 64) { 215 | return p0 + (p25 - p0) * value * (1/64.0f); 216 | } else if (value <= 192) { 217 | return p25 + (p75 - p25) * (value - 64) * (1/128.0f); 218 | } else { 219 | return p75 + (p100 - p75) * (value - 192) * (1/63.0f); 220 | } 221 | } 222 | }; 223 | 224 | REGISTER_KERNEL_BUILDER(Name("DecodeKaldiMatrix").Device(DEVICE_CPU), DecodeKaldiMatrixOp); 225 | 226 | 227 | REGISTER_OP("DecodeKaldiMatrix") 228 | .Input("bytes: string") 229 | .Output("output: out_type") 230 | .Attr("out_type: {float}") 231 | .SetShapeFn(shape_inference::UnknownShape) 232 | .Doc(R"doc( 233 | Reinterpret the bytes of a string as a kaldi matrix 234 | )doc"); 235 | } // namespace tensorflow 236 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Kaldi ark readers for tensorflow 2 | 3 | ## Introduction 4 | 5 | This project aims to enable reading kaldi ark files into tensorflow. It adds 6 | following operators to tensorflow: 7 | 8 | * read_kaldi_matrix(rspecific) 9 | * decode_kaldi_matrix(data, element-type) 10 | * read_kaldi_post_and_ali(rspecific, is_reading_post(bool)) 11 | * decode_kaldi_ali(data, element-type, is_reading_post(bool)) 12 | 13 | For kaldi matrix, only float matrix are supported. Please pass "tf.float32" in 14 | through the element-type argument. 15 | 16 | For compressed kaldi matrix, only compression method 2 (kSpeechFeature) is supported. 17 | 18 | For alignment, user need to specify weather is reading posteriors or pdfs by the argument 19 | `is_reading_post`. Operator decode_kaldi_ali produces alignment pdfs output, in format 20 | of a one dimension int32 tensor (a int32 tensor). Please pass "tf.int32" in through the 21 | element-type argument. 22 | 23 | Contributions are welcome. Feel free to fork and send pull request or to create issues. 24 | 25 | ## Build 26 | 27 | Prerequisitions: 28 | 29 | * Linux 30 | * GCC version > 5.1 (for use of c++11 regex) 31 | * python with tensorflow installed 32 | 33 | Steps: 34 | 35 | 1. git clone 36 | 2. cd kaldi-reader-standalone 37 | 3. mkdir build && cd build 38 | 4. cmake .. -DPYTHONBIN=/path/to/your/correct/version/of/python 39 | 5. make 40 | 41 | Then you are all set. 42 | 43 | ## Usage example 44 | 45 | ```python 46 | kaldi_module = kaldi_module = tf.load_op_library("/path/to/this/project/libkaldi_readers.so") 47 | feats_raw_value = kaldi_module.read_kaldi_matrix("/path/to/somearks/file1.ark:2321") 48 | feats_value = kaldi_module.decode_kaldi_matrix(feats_raw_value, tf.float32) 49 | feats_value.set_shape([None, num_dim]) 50 | ``` 51 | 52 | There are some examples under the `example` directory. To run them, please modify the library path (in contents of the 53 | python files) to the correct path. 54 | 55 | 1. cd example 56 | 2. python read-compressed-matrix.py 57 | 3. python read-uncompressed-matrix.py 58 | 4. python read-post.py 59 | 5. python read-ali.py 60 | 61 | ## Author 62 | 63 | Fan Ziye 64 | 65 | ## Reference 66 | 67 | Kaldi: https://github.com/kaldi-asr/kaldi 68 | Tensorflow: https://www.tensorflow.org/extend/adding_an_op -------------------------------------------------------------------------------- /shape-funcs.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "shape-funcs.hh" 3 | #include "tensorflow/core/lib/core/errors.h" 4 | #include "tensorflow/core/platform/env.h" 5 | #include "tensorflow/core/framework/common_shape_fns.h" 6 | 7 | namespace shape_util { 8 | using tensorflow::shape_inference::DimensionHandle; 9 | using tensorflow::shape_inference::InferenceContext; 10 | using tensorflow::shape_inference::ShapeHandle; 11 | 12 | tensorflow::Status ScalarInputsAndOutputs(InferenceContext *c) { 13 | ShapeHandle unused; 14 | for (int i = 0; i < c->num_inputs(); ++i) { 15 | TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused)); 16 | } 17 | for (int i = 0; i < c->num_outputs(); ++i) { 18 | c->set_output(i, c->Scalar()); 19 | } 20 | return tensorflow::Status::OK(); 21 | } 22 | 23 | tensorflow::Status TwoElementOutput(InferenceContext *c) { 24 | c->set_output(0, c->Vector(2)); 25 | return tensorflow::Status::OK(); 26 | } 27 | } // namespace shape_util -------------------------------------------------------------------------------- /shape-funcs.hh: -------------------------------------------------------------------------------- 1 | // 2 | // Created by zyfan on 12/3/17. 3 | // 4 | 5 | #ifndef KALDI_READER_STANDALONE_SHAPE_FUNCS_HH 6 | #define KALDI_READER_STANDALONE_SHAPE_FUNCS_HH 7 | #include "tensorflow/core/platform/env.h" 8 | #include "tensorflow/core/framework/common_shape_fns.h" 9 | 10 | namespace shape_util { 11 | using tensorflow::shape_inference::InferenceContext; 12 | 13 | tensorflow::Status ScalarInputsAndOutputs(InferenceContext *c); 14 | 15 | tensorflow::Status TwoElementOutput(InferenceContext *c); 16 | } // namespace shape_util 17 | 18 | 19 | #endif //KALDI_READER_STANDALONE_SHAPE_FUNCS_HH 20 | --------------------------------------------------------------------------------