├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── kaldi_argparse └── __init__.py ├── kaldi_io ├── Makefile ├── __init__.py ├── bp_converters.h ├── kaldi_io_internal.cpp └── python_wrappers.h ├── scripts ├── apply-global-cmvn.py ├── compute-global-cmvn-stats.py ├── copy-feats-padded.py └── show-wav-ali.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | .depend.mk 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .cache 41 | nosetests.xml 42 | coverage.xml 43 | 44 | # Translations 45 | *.mo 46 | *.pot 47 | 48 | # Django stuff: 49 | *.log 50 | 51 | # Sphinx documentation 52 | docs/_build/ 53 | 54 | # PyBuilder 55 | target/ 56 | 57 | # Temp files 58 | *~ 59 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SRCDIR = kaldi_io 2 | 3 | ifndef KALDI_ROOT 4 | $(error please set KALDI_ROOT to point ot the base of the kaldi installation) 5 | endif 6 | 7 | .PHONY: all 8 | 9 | all: 10 | $(MAKE) -C $(SRCDIR) depend 11 | $(MAKE) -C $(SRCDIR) 12 | 13 | clean: 14 | $(MAKE) -C $(SRCDIR) clean 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This is a set of Python wrappers for Kaldi input-output classes. 2 | 3 | ## Installation 4 | 5 | Simply run 6 | ``` 7 | ./setup.py install 8 | ``` 9 | 10 | ## Usage 11 | It allows you to do e.g.: 12 | 13 | ``` 14 | In [1]: import kaldi_io 15 | In [2]: feat_reader = kaldi_io.SequentialBaseFloatMatrixReader('scp:./mfcc/raw_mfcc_test.1.scp') 16 | In [3]: next(feat_reader) 17 | Out[3]: 18 | ('FDHC0_SI1559', Shape: (338, 13) 19 | [[ 47.97408295 -21.51651001 -24.72166443 ..., -7.34391451 -5.35192871 20 | 1.24314117] 21 | [ 46.00983429 -19.34067917 -20.49114227 ..., -2.23715401 -3.65503502 22 | -1.64697027] 23 | [ 43.06345367 -21.29892731 -15.17295933 ..., -6.0672245 -14.09746265 24 | -9.02336311] 25 | ..., 26 | [ 37.66175842 -27.93688965 -10.73719597 ..., -4.36497116 -3.1932559 27 | 2.3135519 ] 28 | [ 38.15282059 -30.81328964 -11.75108433 ..., -6.77649689 -3.78556442 29 | 2.52763462] 30 | [ 38.64388275 -29.08744812 -9.59657097 ..., -1.66973591 -0.54327661 31 | 9.77887821]]) 32 | ``` 33 | -------------------------------------------------------------------------------- /kaldi_argparse/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Aug 14, 2014 3 | 4 | @author: chorows 5 | """ 6 | 7 | import os 8 | import sys 9 | import argparse 10 | 11 | 12 | class AddConfig(argparse.Action): 13 | def __init__(self, *args, **kwargs): 14 | argparse.Action.__init__(self, *args, **kwargs) 15 | 16 | def __call__(self, parser, namespace, values, option_string=None): 17 | with open(values,'r') as f: 18 | opts = [l.split('#')[0].strip() for l in f] 19 | parser.parse_args(args=opts, namespace=namespace) 20 | 21 | 22 | class KaldiArgumentParser(argparse.ArgumentParser): 23 | def __init__(self, *args, **kwargs): 24 | kwargs['add_help']=False 25 | #kwargs['fromfile_prefix_chars']='--config=' 26 | version = kwargs.pop('version', None) 27 | super(KaldiArgumentParser, self).__init__(*args, formatter_class=argparse.ArgumentDefaultsHelpFormatter, **kwargs) 28 | self.version = version 29 | 30 | def add_standard_arguments(self): 31 | grp = self.add_argument_group('Standard options') 32 | 33 | default_prefix = '-' 34 | grp.add_argument( 35 | default_prefix+'h', default_prefix*2+'help', 36 | action='help', default=argparse.SUPPRESS, 37 | help=argparse._('show this help message and exit')) 38 | if self.version: 39 | grp.add_argument( 40 | default_prefix+'v', default_prefix*2+'version', 41 | action='version', default=argparse.SUPPRESS, 42 | version=self.version, 43 | help=argparse._("show program's version number and exit")) 44 | grp.add_argument('--print-args', type=bool, default=True, help='Print the command line arguments (to stderr)') 45 | #grp.add_argument('--config', action=AddConfig, help='Configuration file with options') 46 | grp.add_argument('--config', default=argparse.SUPPRESS, help='Configuration file with options') 47 | 48 | def parse_known_args(self, args=None, namespace=None): 49 | if args is None: 50 | args = sys.argv[1:] 51 | expanded_args = [] 52 | 53 | next_arg_is_conf = False 54 | conf_file = None 55 | 56 | for arg in args: 57 | if arg.startswith('--config') or next_arg_is_conf: 58 | if next_arg_is_conf: 59 | conf_file = arg 60 | elif arg.startswith('--config='): 61 | conf_file = arg[9:].strip() #eat --config= 62 | else: 63 | next_arg_is_conf = True 64 | if conf_file: 65 | with open(conf_file,'r') as f: 66 | expanded_args.extend(l.split('#')[0].strip() for l in f) 67 | next_arg_is_conf = False 68 | conf_file = None 69 | else: 70 | expanded_args.append(arg) 71 | return argparse.ArgumentParser.parse_known_args(self, args=expanded_args, namespace=namespace) 72 | 73 | def parse_args(self, args=None, namespace=None): 74 | args = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace) 75 | if args.print_args: 76 | print >>sys.stderr, os.path.basename(sys.argv[0]), " ".join(sys.argv[1:]) 77 | return args 78 | -------------------------------------------------------------------------------- /kaldi_io/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: 3 | EXTRA_CXXFLAGS = -Wno-sign-compare 4 | 5 | KALDI_SRC = $(KALDI_ROOT)/src 6 | 7 | include $(KALDI_SRC)/kaldi.mk 8 | 9 | BINFILES = 10 | 11 | 12 | OBJFILES = 13 | 14 | 15 | ADDLIBS = $(KALDI_SRC)/lm/kaldi-lm.a $(KALDI_SRC)/decoder/kaldi-decoder.a $(KALDI_SRC)/lat/kaldi-lat.a \ 16 | $(KALDI_SRC)/hmm/kaldi-hmm.a $(KALDI_SRC)/transform/kaldi-transform.a $(KALDI_SRC)/gmm/kaldi-gmm.a \ 17 | $(KALDI_SRC)/tree/kaldi-tree.a $(KALDI_SRC)/util/kaldi-util.a $(KALDI_SRC)/matrix/kaldi-matrix.a \ 18 | $(KALDI_SRC)/base/kaldi-base.a 19 | 20 | TESTFILES = 21 | 22 | LDFLAGS += -L$(shell python -c "import distutils.sysconfig; print(distutils.sysconfig.get_config_var('LIBDIR'))") 23 | PYLIB = $(shell python-config --libs) 24 | PYINC = $(shell python-config --includes) 25 | NPINC = -I$(shell python -c 'import numpy; print(numpy.get_include())') 26 | 27 | PYLIBS = kaldi_io_internal.so 28 | 29 | #include $(KALDI_SRC)/makefiles/default_rules.mk 30 | 31 | %.so: %.cpp 32 | g++ -shared -o $@ -Wall -fPIC -I$(KALDI_SRC) $(PYINC) $(NPINC) $(CXXFLAGS) $< $(ADDLIBS) $(LDFLAGS) $(PYLIB) $(LOADLIBES) $(LDLIBS) -lpython2.7 -lboost_python -lboost_system 33 | 34 | clean: 35 | -rm -f *.o *.a *.so $(TESTFILES) $(BINFILES) $(TESTOUTPUTS) tmp* *.tmp 36 | 37 | depend: 38 | -$(CXX) -I$(KALDI_SRC) $(PYINC) $(NPINC) -M $(CXXFLAGS) *.cpp > .depend.mk 39 | 40 | test: 41 | 42 | all: $(PYLIBS) 43 | -------------------------------------------------------------------------------- /kaldi_io/__init__.py: -------------------------------------------------------------------------------- 1 | """Python Wrappers for Kaldi table IO (:kaldi:`io.html`) 2 | 3 | In Kaldi the archive does not carry information about its contents and the user is required to 4 | use the proper Reader or Writer. This module follows this approach and provides wrappers for 5 | RandomAccess and Sequential readers, and for the Writers. The classes are instantiated for 6 | each Kaldi type. 7 | 8 | Internally, the wrappers define holders (:kaldi:`io.html#io_sec_holders`) for python types 9 | and instantiates the regular Kaldi templates. In this way, the wrappers are 100% compatible with 10 | Kaldi and support using pipes and subprograms for inputs and outputs. 11 | 12 | The Python readers and writers implement the context api, and are fully usable with the Python 13 | `with` construct. 14 | 15 | Examples: 16 | A matrix to text converter: 17 | 18 | .. code-block:: python 19 | 20 | with kaldi_io.SequentialBaseFloatMatrixReader('ark:mat.ark') as reader: 21 | for name,mat in reader: 22 | print name, mat 23 | 24 | A simple vector generator: 25 | 26 | .. code-block:: python 27 | 28 | with kaldi_io.Int32VectorWriter('ark:| gzip -c vec.ark.gz') as w: 29 | for len in xrange(10): 30 | vec = [len] * len 31 | w['vec_%d' %(len,)] = vec 32 | 33 | Kaldi Reader classes 34 | ==================== 35 | 36 | Kaldi provides two types of reader: the Sequential reader which is akin to an iterator and the 37 | Random Access reader which is akin to a dict. Both work with piped data, thus the random access 38 | readers may be required to read and store objects in memory until the proper one is found. More 39 | information is in :kaldi:`io.html#io_sec_bloat`. 40 | 41 | Kaldi programs typically open one Sequential reader (e.g. for the features) and several RandomAccess 42 | readers. For each feature, the random access readers would be used to fetch auxiliary information, while 43 | ensuring that they pertain to the same utterance. This resemples a merge-sort merge phase and works well 44 | if all the files are properly sorted. Citing :kaldi:`data_prep.html#data_prep_data_yourself`: 45 | 46 | .. note:: 47 | 48 | All of these files should be sorted. If they are not sorted, you will get errors when you run the scripts. In The Table concept we explain why this is needed. It has to do with the I/O framework; the ultimate reason for the sorting is to enable something equivalent to random-access lookup on a stream that doesn't support fseek(), such as a piped command. Many Kaldi programs are reading multiple pipes from other Kaldi commands, reading different types of object, and are doing something roughly comparable to merge-sort on the different inputs; merge-sort, of course, requires that the inputs be sorted. Be careful when you sort that you have the shell variable LC_ALL defined as "C", for example (in bash), 49 | 50 | export LC_ALL=C 51 | 52 | If you don't do this, the files will be sorted in an order that's different from how C++ sorts strings, and Kaldi will crash. You have been warned! 53 | 54 | .. py:class:: DataTypeSequentialReader(rx_specifier) 55 | 56 | The SequentialReader mostly ressembles a Python iterator. Therefore it implements the 57 | Iterator protocol: 58 | 59 | .. py:method:: __iter__() 60 | 61 | Returns self 62 | 63 | .. py:method:: next() 64 | 65 | :return: a tuple of: 66 | 67 | * key (string) 68 | * value (type is determined by the reader class) 69 | 70 | Moreover it provides a method to check whether the iterator is empty: 71 | 72 | .. py:method:: done() 73 | 74 | Returns `True` if the iterator is empty 75 | 76 | Kaldi uses a slightly different iteration protocol, which can be accessed using the functions: 77 | 78 | .. py:method:: _kaldi_next() 79 | 80 | Advance the iterator by one value 81 | 82 | .. py:method:: _kaldi_key() 83 | 84 | Returns the key of the cirrent value 85 | 86 | .. py:method:: _kaldi_value() 87 | 88 | Returns the current value (i.e. the value that will be returned on the next call 89 | to :func:`next`) 90 | 91 | For resource management the classes implement: 92 | 93 | .. py:method:: close() 94 | 95 | Closes the reader. 96 | 97 | .. py:method:: is_open() 98 | 99 | Returns `True` is the reader is opened and can be read from 100 | 101 | .. py:method:: __enter__() 102 | .. py:method:: __exit__() 103 | 104 | Implement the `with` context protocol 105 | 106 | 107 | .. py:class:: DataTypeRandomAccessReader(rx_specifier) 108 | 109 | The random access ressembles a Python dict - values are retrieved for a given key value. 110 | Therefore the rader acts in a dict-like manner: 111 | 112 | .. py:method:: __contains__(key) 113 | .. py:method:: has_key(key) 114 | 115 | Returns `True` if key is present in reader. Enabvles the use of the `in` operator. 116 | 117 | .. py:method:: __getitem__(key) 118 | .. py:method:: value(key) 119 | 120 | Returns the value associeted with key 121 | 122 | For resource management the classes implement: 123 | 124 | .. py:method:: close() 125 | 126 | Closes the reader. 127 | 128 | .. py:method:: is_open() 129 | 130 | Returns `True` is the reader is opened and can be read from 131 | 132 | .. py:method:: __enter__() 133 | .. py:method:: __exit__() 134 | 135 | Implement the `with` context protocol 136 | 137 | .. py:class: DataTypeRandomAccessReaderMapped(data_rx_specifier, maping_rx_specifier) 138 | This class implement a random access reader whose keys have been mapped using the mapper. 139 | See :kaldi:`io.html#io_sec_mapped` for more explanation 140 | 141 | Kaldi Writer class 142 | ================== 143 | 144 | Th writer stores key-value pairs and thus ressembles a dict. However, unlike a dict 145 | no checks for key duplication are made. The writer will happily store all values using 146 | the same key, which may render them unusable. For best cooperation with KAldi, the keys 147 | should be written sorted in the `C order`. 148 | 149 | 150 | .. py:class:: DataTypeWriter(wx_specifier) 151 | .. py:method:: write(key, value) 152 | .. py:method:: __setitem__(key,value) 153 | 154 | Append to the file the value under key 155 | 156 | .. py:method:: flush() 157 | 158 | Flush the output stream. 159 | 160 | For resource management the classes implement: 161 | 162 | .. py:method:: close() 163 | 164 | Closes the writer. 165 | 166 | .. py:method:: is_open() 167 | 168 | Returns `True` is the writer is opened and can be written to 169 | 170 | .. py:method:: __enter__() 171 | .. py:method:: __exit__() 172 | 173 | Implement the `with` context protocol 174 | 175 | Transformed Readers 176 | =================== 177 | 178 | Very often the value read into Python would need to be further converted. The classes 179 | `TransRA` and `TransSeq` take an appropriate reader and a function that will be used to 180 | transform all objects returned 181 | 182 | 183 | Mapping between Kaldi and Python Objects 184 | ======================================== 185 | 186 | The readers and writers are named after the Kaldi type they access. 187 | 188 | +--------------------+---------------------+-----------------------+-----------------------+ 189 | | Kaldi Type | Read Python Type | Writable Python Types | Notes | 190 | | | | | | 191 | +====================+=====================+=======================+=======================+ 192 | |Matrix |NDArray of |Any Python object |BaseFloat is mapped to | 193 | | |appropriate |convertible to an |either float32 (c's | 194 | | |DTYPE. Float32 and |NDarray |float) or float64 (c's | 195 | | |Float64 are used for | |double) based on Kaldi | 196 | | |float and double, | |compile options | 197 | | |respectively. | | | 198 | +--------------------+---------------------+-----------------------+-----------------------+ 199 | |Vector |1-dimensional NDarray|Any Python object |Same as for Matrix | 200 | | |of appropriate type. |convertible to 1d | | 201 | | | |NDarray of appropriate | | 202 | | | |type | | 203 | | | | | | 204 | | | | | | 205 | +--------------------+---------------------+-----------------------+-----------------------+ 206 | |std vector |1-dimensional NDarray|any python iterable | | 207 | | |of int32 | | | 208 | | | | | | 209 | | | | | | 210 | | | | | | 211 | | | | | | 212 | +--------------------+---------------------+-----------------------+-----------------------+ 213 | |std::vector> | |convertible to 1d | | 215 | | | |NDarrays | | 216 | | | | | | 217 | | | | | | 218 | | | | | | 219 | +--------------------+---------------------+-----------------------+-----------------------+ 220 | |std:: |tuple of ints |tuple of ints | | 221 | |pair | | | | 222 | | | | | | 223 | | | | | | 224 | | | | | | 225 | | | | | | 226 | +--------------------+---------------------+-----------------------+-----------------------+ 227 | | | Any Python object | Any Python object |Uses repr/eval in text | 228 | | | | |mode and cPickle in | 229 | | | | |binary mode | 230 | | | | | | 231 | | | | | | 232 | | | | | | 233 | +--------------------+---------------------+-----------------------+-----------------------+ 234 | 235 | 236 | Created on Jul 31, 2014 237 | 238 | @author: chorows 239 | """ 240 | 241 | 242 | import numpy as np 243 | from kaldi_io_internal import * 244 | 245 | if KALDI_BASE_FLOAT()==np.float64: 246 | RandomAccessBaseFloatMatrixReader = RandomAccessFloat64MatrixReader 247 | RandomAccessBaseFloatMatrixMapped = RandomAccessFloat64MatrixMapped 248 | SequentialBaseFloatMatrixReader = SequentialFloat64MatrixReader 249 | BaseFloatMatrixWriter = Float64MatrixWriter 250 | 251 | RandomAccessBaseFloatVectorReader = RandomAccessFloat64VectorReader 252 | RandomAccessBaseFloatVectorReaderMapped = RandomAccessFloat64VectorReaderMapped 253 | SequentialBaseFloatVectorReader = SequentialFloat64VectorReader 254 | BaseFloatVectorWriter = Float64VectorWriter 255 | 256 | if KALDI_BASE_FLOAT()==np.float32: 257 | RandomAccessBaseFloatMatrixReader = RandomAccessFloat32MatrixReader 258 | RandomAccessBaseFloatMatrixMapped = RandomAccessFloat32MatrixMapped 259 | SequentialBaseFloatMatrixReader = SequentialFloat32MatrixReader 260 | BaseFloatMatrixWriter = Float32MatrixWriter 261 | 262 | RandomAccessBaseFloatVectorReader = RandomAccessFloat32VectorReader 263 | RandomAccessBaseFloatVectorReaderMapped = RandomAccessFloat32VectorReaderMapped 264 | SequentialBaseFloatVectorReader = SequentialFloat32VectorReader 265 | BaseFloatVectorWriter = Float32VectorWriter 266 | 267 | 268 | def get_io_for_dtype(access, dtype, element=''): 269 | ''' 270 | Get a writer or reader for the given dtype. eg: 271 | get_io_for_dtype('Sequential',np.float32,'MatrixReader') 272 | get_io_for_dtype('float32,'MatrixWriter') 273 | ''' 274 | if element=='': #assume we want a writer 275 | access, dtype,element = '',access,dtype 276 | dtypemap = {np.int32:'Int32', 277 | np.float32:'Float32', 278 | np.float64:'Float64', 279 | 'float32':'Float32', 280 | 'float64':'Float64'} 281 | dtype = dtypemap[dtype] 282 | return globals()[access + dtype + element] 283 | 284 | 285 | class _Transformed(object): 286 | def __init__(self, reader, transform_function, **kwargs): 287 | super(_Transformed, self).__init__(**kwargs) 288 | self.reader=reader 289 | self.transform_function = transform_function 290 | 291 | def __getattr__(self, attr): 292 | return getattr(self.reader,attr) 293 | 294 | 295 | class TransRA(_Transformed): 296 | def __init__(self, *args, **kwargs): 297 | super(TransRA, self).__init__(*args, **kwargs) 298 | 299 | def value(self, key): 300 | return self.transform_function(self.reader.value(key)) 301 | 302 | def __getitem__(self, key): 303 | return self.value(key) 304 | 305 | 306 | class TransSeq(_Transformed): 307 | def __init__(self, *args, **kwargs): 308 | super(TransSeq, self).__init__(*args, **kwargs) 309 | 310 | def next(self): 311 | return self.transform_function(self.reader.next()) 312 | 313 | def _kaldi_value(self): 314 | return self.transform_function(self.reader._kaldi_value()) 315 | 316 | -------------------------------------------------------------------------------- /kaldi_io/bp_converters.h: -------------------------------------------------------------------------------- 1 | /* 2 | * bp_converters.h 3 | * 4 | * Created on: Aug 28, 2014 5 | * Author: chorows 6 | */ 7 | 8 | #ifndef BP_CONVERTERS_H_ 9 | #define BP_CONVERTERS_H_ 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | 21 | 22 | namespace kaldi { 23 | // 24 | // Code transformend from http://code.activestate.com/lists/python-cplusplus-sig/16463/ and 25 | // http://misspent.wordpress.com/2009/09/27/how-to-write-boost-python-converters/ 26 | // 27 | template 28 | struct VectorToListBPConverter { 29 | 30 | static PyObject* convert(std::vector const& vec) { 31 | boost::python::list l; 32 | 33 | for (size_t i = 0; i < vec.size(); i++) 34 | l.append(vec[i]); 35 | return boost::python::incref(l.ptr()); 36 | } 37 | }; 38 | 39 | template 40 | struct VectorFromListBPConverter { 41 | VectorFromListBPConverter() { 42 | using namespace boost::python; 43 | using namespace boost::python::converter; 44 | boost::python::converter::registry::push_back( 45 | &VectorFromListBPConverter::convertible, 46 | &VectorFromListBPConverter::construct, type_id >()); 47 | } 48 | 49 | // Determine if obj_ptr can be converted in a std::vector 50 | static void* convertible(PyObject* obj_ptr) { 51 | // if (!PyIter_Check(obj_ptr)) { 52 | // return 0; 53 | // } 54 | return obj_ptr; 55 | } 56 | 57 | // Convert obj_ptr into a std::vector 58 | static void construct( 59 | PyObject* obj_ptr, 60 | boost::python::converter::rvalue_from_python_stage1_data* data) { 61 | 62 | boost::python::object o = boost::python::object(boost::python::handle<>(boost::python::borrowed(obj_ptr))); 63 | boost::python::stl_input_iterator begin(o); 64 | boost::python::stl_input_iterator end; 65 | 66 | // Grab pointer to memory into which to construct the new std::vector 67 | void* storage = ((boost::python::converter::rvalue_from_python_storage< 68 | std::vector >*) data)->storage.bytes; 69 | 70 | // in-place construct the new std::vector using the character data 71 | // extraced from the python object 72 | std::vector& v = *(new (storage) std::vector()); 73 | 74 | v.insert(v.end(), begin, end); 75 | 76 | // Stash the memory chunk pointer for later use by boost.python 77 | data->convertible = storage; 78 | } 79 | }; 80 | 81 | template 82 | struct MapFromDictBPConverter { 83 | MapFromDictBPConverter() { 84 | boost::python::converter::registry::push_back( 85 | &MapFromDictBPConverter::convertible, 86 | &MapFromDictBPConverter::construct, boost::python::type_id()); 87 | } 88 | 89 | // Determine if obj_ptr can be converted in a std::vector 90 | static void* convertible(PyObject* obj_ptr) { 91 | if (!PyDict_Check(obj_ptr)) { 92 | return 0; 93 | } 94 | return obj_ptr; 95 | } 96 | 97 | // Convert obj_ptr into a std::vector 98 | static void construct( 99 | PyObject* obj_ptr, 100 | boost::python::converter::rvalue_from_python_stage1_data* data) { 101 | 102 | boost::python::dict obj(boost::python::handle<>(boost::python::borrowed(obj_ptr))); 103 | boost::python::list keys = obj.keys(); 104 | 105 | // Grab pointer to memory into which to construct the new std::vector 106 | void* storage = ((boost::python::converter::rvalue_from_python_storage< M >*) data)->storage.bytes; 107 | 108 | M& map = *(new (storage) M()); 109 | 110 | boost::python::stl_input_iterator begin(keys); 111 | boost::python::stl_input_iterator end; 112 | 113 | for (;begin!=end; ++begin) { 114 | const typename M::key_type& k = *begin; 115 | const typename M::mapped_type& v = boost::python::extract(obj[k]); 116 | map[k] = v; 117 | } 118 | 119 | // Stash the memory chunk pointer for later use by boost.python 120 | data->convertible = storage; 121 | } 122 | }; 123 | 124 | 125 | template 126 | struct PairToTupleBPConverter { 127 | 128 | static PyObject* convert(std::pair const& p) { 129 | return boost::python::incref(boost::python::make_tuple(p.first, p.second).ptr()); 130 | } 131 | }; 132 | 133 | template 134 | struct PairFromTupleBPConverter { 135 | PairFromTupleBPConverter() { 136 | boost::python::converter::registry::push_back( 137 | &PairFromTupleBPConverter::convertible, 138 | &PairFromTupleBPConverter::construct, boost::python::type_id >()); 139 | } 140 | 141 | // Determine if obj_ptr can be converted in a std::vector 142 | static void* convertible(PyObject* obj_ptr) { 143 | if (!PyTuple_Check(obj_ptr) || PySequence_Length(obj_ptr)!=2) { 144 | return 0; 145 | } 146 | return obj_ptr; 147 | } 148 | 149 | // Convert obj_ptr into a std::vector 150 | static void construct( 151 | PyObject* obj_ptr, 152 | boost::python::converter::rvalue_from_python_stage1_data* data) { 153 | 154 | boost::python::tuple t = boost::python::tuple(boost::python::handle<>(boost::python::borrowed(obj_ptr))); 155 | 156 | // Grab pointer to memory into which to construct the new std::vector 157 | void* storage = ((boost::python::converter::rvalue_from_python_storage< 158 | std::pair >*) data)->storage.bytes; 159 | 160 | // in-place construct the new std::vector using the character data 161 | // extraced from the python object 162 | std::pair& v = *(new (storage) std::pair()); 163 | 164 | v.first=boost::python::extract(t[0]); 165 | v.second=boost::python::extract(t[1]); 166 | 167 | // Stash the memory chunk pointer for later use by boost.python 168 | data->convertible = storage; 169 | } 170 | }; 171 | 172 | 173 | } 174 | 175 | #endif /* BP_CONVERTERS_H_ */ 176 | -------------------------------------------------------------------------------- /kaldi_io/kaldi_io_internal.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * kaldi-io.cpp 3 | * 4 | * Created on: Jul 29, 2014 5 | * Author: chorows 6 | */ 7 | 8 | extern "C" { 9 | #include "Python.h" 10 | #include "numpy/arrayobject.h" 11 | } 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "python_wrappers.h" 27 | #include "bp_converters.h" 28 | 29 | 30 | using namespace std; 31 | 32 | namespace bp = boost::python; 33 | 34 | //keep a copy of the cPickle module in cache 35 | struct PickleWrapper { 36 | PickleWrapper() { 37 | bp::object pickle = bp::import("cPickle"); 38 | loads = pickle.attr("loads"); 39 | dumps = pickle.attr("dumps"); 40 | } 41 | 42 | bp::object loads, dumps; 43 | }; 44 | 45 | // 46 | // Holder for Python objects. 47 | // 48 | // In binary model uses Pickle to dump, the object is written as dump_length, pickled_string 49 | // In text mode uses repr/eval (only single line), which works OK for simple types - lists, tuples, ints, but may fail for large arrays (as repr skips elemets for ndarray). 50 | // 51 | class PyObjectHolder { 52 | public: 53 | typedef bp::object T; 54 | 55 | PyObjectHolder() { 56 | } 57 | 58 | static bool Write(std::ostream &os, bool binary, const T &t) { 59 | kaldi::InitKaldiOutputStream(os, binary); // Puts binary header if binary mode. 60 | try { 61 | if (binary) { //pickle the object 62 | bp::object py_string = PW()->dumps(t,-1); 63 | int len = bp::extract(py_string.attr("__len__")()); 64 | const char* string = bp::extract(py_string); 65 | kaldi::WriteBasicType(os, true, len); 66 | os.write(string, len); 67 | } else { //use repr 68 | PyObject* repr = PyObject_Repr(t.ptr()); 69 | os << PyString_AsString(repr) << '\n'; 70 | Py_DECREF(repr); 71 | } 72 | return os.good(); 73 | 74 | } catch (const std::exception &e) { 75 | KALDI_WARN<< "Exception caught writing Table object. " << e.what(); 76 | return false; // Write failure. 77 | } 78 | } 79 | 80 | bool Read(std::istream &is) { 81 | bool is_binary; 82 | if (!kaldi::InitKaldiInputStream(is, &is_binary)) { 83 | KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n"; 84 | return false; 85 | } 86 | try { 87 | if (is_binary) { 88 | int len; 89 | kaldi::ReadBasicType(is, true, &len); 90 | std::auto_ptr buf(new char[len]); 91 | is.read(buf.get(), len); 92 | bp::str py_string(buf.get(), len); 93 | t_ = PW()->loads(py_string); 94 | } else { 95 | std::string line; 96 | std::getline(is, line); 97 | bp::str repr(line); 98 | t_ = bp::eval(repr); 99 | } 100 | return true; 101 | } catch (std::exception &e) { 102 | KALDI_WARN << "Exception caught reading Table object. " << e.what(); 103 | return false; 104 | } 105 | } 106 | 107 | static bool IsReadInBinary() {return true;} 108 | 109 | const T &Value() const {return t_;} // if t is a pointer, would return *t_; 110 | 111 | void Clear() {} 112 | 113 | void Swap(PyObjectHolder * another) { 114 | std::swap(t_, another->t_); 115 | } 116 | 117 | bool ExtractRange(const PyObjectHolder &other, const std::string &range) { 118 | KALDI_ERR << "ExtractRange is not defined for this type of holder."; 119 | return false; 120 | } 121 | 122 | ~PyObjectHolder() {} 123 | 124 | private: 125 | KALDI_DISALLOW_COPY_AND_ASSIGN(PyObjectHolder); 126 | T t_; // t_ may alternatively be of type T*. 127 | static PickleWrapper *PW_; 128 | static PickleWrapper * PW() { 129 | if (!PW_) { 130 | PW_ = new PickleWrapper(); 131 | } 132 | return PW_; 133 | } 134 | }; 135 | 136 | PickleWrapper * PyObjectHolder::PW_ = 0; 137 | 138 | 139 | template 140 | struct MatrixToNdArrayConverter { 141 | typedef kaldi::KaldiObjectHolder > HR; 142 | typedef kaldi::KaldiObjectHolder > HW; 143 | 144 | static inline bp::object kaldi_to_python(const kaldi::Matrix& mat) { 145 | npy_intp dims[2]; 146 | dims[0] = mat.NumRows(); 147 | dims[1] = mat.NumCols(); 148 | int nd = 2; 149 | int arr_type = kaldi::get_np_type(); 150 | PyObject* ao = PyArray_SimpleNew(nd, dims, arr_type); 151 | bp::object arr=bp::object(bp::handle<>( 152 | ao 153 | )); 154 | kaldi::NpWrapperMatrix arr_wrap((PyArrayObject*)arr.ptr()); 155 | arr_wrap.CopyFromMat(mat); 156 | return arr; 157 | } 158 | 159 | static inline kaldi::NpWrapperMatrix* python_to_kaldi(bp::object o) { 160 | PyObject* raw_arr = PyArray_FromAny(o.ptr(),PyArray_DescrFromType(kaldi::get_np_type()), 2, 2, NPY_C_CONTIGUOUS | NPY_FORCECAST, NULL); 161 | //why does this fail: bp::object arr(bp::handle<>(raw_arr)); 162 | bp::object arr=bp::object(bp::handle<>(raw_arr)); 163 | return new kaldi::NpWrapperMatrix((PyArrayObject*)arr.ptr()); 164 | } 165 | }; 166 | 167 | template 168 | struct VectorToNdArrayConverter { 169 | typedef kaldi::KaldiObjectHolder > HR; 170 | typedef kaldi::KaldiObjectHolder > HW; 171 | 172 | static inline bp::object kaldi_to_python(const kaldi::Vector& vec) { 173 | npy_intp dims[1]; 174 | dims[0] = vec.Dim(); 175 | int nd = 1; 176 | 177 | int arr_type = kaldi::get_np_type(); 178 | PyObject* ao = PyArray_SimpleNew(nd, dims, arr_type); 179 | bp::object arr=bp::object(bp::handle<>( 180 | ao 181 | )); 182 | kaldi::NpWrapperVector vec_wrap((PyArrayObject*)arr.ptr()); 183 | vec_wrap.CopyFromVec(vec); 184 | return arr; 185 | } 186 | 187 | static inline kaldi::NpWrapperVector* python_to_kaldi(bp::object o) { 188 | PyObject* raw_arr = PyArray_FromAny(o.ptr(),PyArray_DescrFromType(kaldi::get_np_type()), 1, 1, NPY_C_CONTIGUOUS | NPY_FORCECAST, NULL); 189 | //why does this fail: bp::object arr(bp::handle<>(raw_arr)); 190 | bp::object arr=bp::object(bp::handle<>(raw_arr)); 191 | return new kaldi::NpWrapperVector((PyArrayObject*)arr.ptr()); 192 | } 193 | }; 194 | 195 | 196 | 197 | template 198 | struct VectorToNDArrayBPConverter { 199 | static PyObject* convert(std::vector const& vec) { 200 | npy_intp dims[1]; 201 | dims[0] = vec.size(); 202 | int nd = 1; 203 | int arr_type = kaldi::get_np_type(); 204 | PyObject* ao = PyArray_SimpleNew(nd, dims, arr_type); 205 | bp::object arr=bp::object(bp::handle<>( 206 | ao 207 | )); 208 | std::copy(vec.begin(), vec.end(), (T*)PyArray_DATA(ao)); 209 | return bp::incref(arr.ptr()); 210 | } 211 | }; 212 | 213 | 214 | 215 | template 216 | struct BoostPythonconverter { 217 | typedef HW_ HW; 218 | typedef HR_ HR; 219 | 220 | static inline bp::object kaldi_to_python(const Obj& o) { 221 | return bp::object(o); 222 | } 223 | 224 | static inline Obj * python_to_kaldi(bp::object o) { 225 | return new Obj(bp::extract(o)); 226 | } 227 | }; 228 | 229 | template 230 | class PythonToKaldiHolder { 231 | public: 232 | typedef bp::object T; 233 | typedef typename Converter::HR HR; 234 | typedef typename Converter::HW HW; 235 | 236 | PythonToKaldiHolder() : h_() { 237 | } 238 | 239 | static bool Write(std::ostream &os, bool binary, const T &t) { 240 | try { 241 | auto_ptr obj(Converter::python_to_kaldi(t)); 242 | return HW::Write(os, binary, (*obj)); 243 | } catch (std::exception &e) { 244 | KALDI_WARN << "Exception caught reading Table object. " << e.what(); 245 | return false; 246 | } 247 | } 248 | 249 | bool Read(std::istream &is) { 250 | if (!h_.Read(is)) 251 | return false; 252 | t_ = Converter::kaldi_to_python(h_.Value()); 253 | return true; 254 | } 255 | 256 | static bool IsReadInBinary() {return true;} 257 | 258 | const T &Value() const {return t_;} // if t is a pointer, would return *t_; 259 | 260 | void Clear() {} 261 | 262 | void Swap(PythonToKaldiHolder * another) { 263 | std::swap(t_, another->t_); 264 | } 265 | 266 | bool ExtractRange(const PythonToKaldiHolder &other, const std::string &range) { 267 | KALDI_ERR << "ExtractRange is not defined for this type of holder."; 268 | return false; 269 | } 270 | 271 | ~PythonToKaldiHolder() {} 272 | 273 | private: 274 | KALDI_DISALLOW_COPY_AND_ASSIGN(PythonToKaldiHolder); 275 | HR h_; 276 | T t_; // t_ may alternatively be of type T*. 277 | }; 278 | 279 | template 280 | struct VectorHolder { 281 | typedef PythonToKaldiHolder, 282 | kaldi::BasicVectorHolder, kaldi::BasicVectorHolder > > type; 283 | 284 | static void register_converters() { 285 | bp::to_python_converter, kaldi::VectorToListBPConverter >(); 286 | kaldi::VectorFromListBPConverter(); 287 | } 288 | }; 289 | 290 | template 291 | struct VectorNDArrayHolder { 292 | typedef PythonToKaldiHolder, 293 | kaldi::BasicVectorHolder, kaldi::BasicVectorHolder > > type; 294 | 295 | static void register_converters() { 296 | bp::to_python_converter, VectorToNDArrayBPConverter >(); 297 | kaldi::VectorFromListBPConverter(); 298 | } 299 | }; 300 | 301 | template 302 | struct VectorVectorHolder { 303 | typedef PythonToKaldiHolder > , 304 | kaldi::BasicVectorVectorHolder, kaldi::BasicVectorVectorHolder > > type; 305 | 306 | static void register_converters() { 307 | bp::to_python_converter >, kaldi::VectorToListBPConverter > >(); 308 | kaldi::VectorFromListBPConverter >(); 309 | } 310 | }; 311 | 312 | template 313 | struct PairVectorHolder { 314 | typedef PythonToKaldiHolder > , 315 | kaldi::BasicPairVectorHolder, kaldi::BasicPairVectorHolder > > type; 316 | 317 | static void register_converters() { 318 | //register the pair first 319 | bp::to_python_converter, 320 | kaldi::PairToTupleBPConverter >(); 321 | kaldi::PairFromTupleBPConverter(); 322 | 323 | //next register the pair vector 324 | bp::to_python_converter >, 325 | kaldi::VectorToListBPConverter > >(); 326 | kaldi::VectorFromListBPConverter >(); 327 | } 328 | }; 329 | 330 | template 331 | const T& get_self_ref(const T& t) { 332 | return t; 333 | } 334 | 335 | template 336 | void exit(T& t, const bp::object& type, 337 | const bp::object& value, const bp::object& traceback) { 338 | t.Close(); 339 | } 340 | 341 | template 342 | bp::object sequential_reader_next(T& reader) { 343 | if (!reader.IsOpen() || reader.Done()) { 344 | PyErr_SetString(PyExc_StopIteration, "No more data."); 345 | bp::throw_error_already_set(); 346 | } 347 | //if not done, extract the contents 348 | bp::str key(reader.Key()); 349 | bp::object val(reader.Value()); 350 | //move the reading head, the contents will be read with the next call to next! 351 | reader.Next(); 352 | return bp::make_tuple(key,val); 353 | } 354 | 355 | template 356 | class RandomAccessWrapper: public bp::class_ { 357 | public: 358 | template 359 | inline RandomAccessWrapper(char const* name, bp::init_base const& i) 360 | : bp::class_(name, i) { 361 | (*this) 362 | .def("close", &Reader::Close) 363 | .def("is_open", &Reader::IsOpen) 364 | .def("__contains__", &Reader::HasKey) 365 | .def("has_key", &Reader::HasKey) 366 | .def("__getitem__", &Reader::Value, 367 | bp::return_value_policy()) 368 | .def("value", &Reader::Value, 369 | bp::return_value_policy()) 370 | .def("__enter__", &get_self_ref, 371 | bp::return_internal_reference<1>()) 372 | .def("__exit__", &exit) 373 | ; 374 | } 375 | }; 376 | 377 | template 378 | class SequentialReaderWrapper: public bp::class_ { 379 | public: 380 | template 381 | inline SequentialReaderWrapper(char const* name, bp::init_base const& i) 382 | : bp::class_(name, i) { 383 | (*this) 384 | .def("close", &Reader::Close) 385 | .def("is_open", &Reader::IsOpen) 386 | .def("__enter__", &get_self_ref, 387 | bp::return_internal_reference<1>()) 388 | .def("__iter__", &get_self_ref, 389 | bp::return_internal_reference<1>()) 390 | .def("next", sequential_reader_next) 391 | .def("__exit__", &exit) 392 | .def("done", &Reader::Done) 393 | .def("_kaldi_value", &Reader::Value, 394 | bp::return_value_policy()) 395 | .def("_kaldi_next", &Reader::Next) 396 | .def("_kaldi_key", &Reader::Key) 397 | ; 398 | } 399 | }; 400 | 401 | template 402 | class WriterWrapper: public bp::class_ { 403 | public: 404 | template 405 | inline WriterWrapper(char const* name, bp::init_base const& i) 406 | : bp::class_(name, i) { 407 | (*this) 408 | .def("close", &Writer::Close) 409 | .def("is_open", &Writer::IsOpen) 410 | .def("flush", &Writer::Flush) 411 | .def("write", &Writer::Write) 412 | .def("__setitem__", &Writer::Write) 413 | .def("__enter__", &get_self_ref, 414 | bp::return_internal_reference<1>()) 415 | .def("__exit__",&exit) 416 | ; 417 | } 418 | }; 419 | 420 | 421 | PyObject* KALDI_BASE_FLOAT() { 422 | return (PyObject*)PyArray_DescrFromType(kaldi::get_np_type()); 423 | } 424 | 425 | BOOST_PYTHON_MODULE(kaldi_io_internal) 426 | { 427 | import_array(); 428 | 429 | bp::def("KALDI_BASE_FLOAT", &KALDI_BASE_FLOAT); 430 | 431 | //Python objects 432 | RandomAccessWrapper >("RandomAccessPythonReader", bp::init()); 433 | RandomAccessWrapper >("RandomAccessPythonReaderMapped", bp::init()); 434 | SequentialReaderWrapper >("SequentialPythonReader",bp::init()); 435 | WriterWrapper >("PythonWriter", bp::init()); 436 | 437 | //Matrices as NdArrays 438 | RandomAccessWrapper > > >("RandomAccessFloat64MatrixReader", bp::init()); 439 | RandomAccessWrapper > > >("RandomAccessFloat64MatrixMapped",bp::init()); 440 | SequentialReaderWrapper > > >("SequentialFloat64MatrixReader",bp::init()); 441 | WriterWrapper > > >("Float64MatrixWriter", bp::init()); 442 | 443 | RandomAccessWrapper > > >("RandomAccessFloat32MatrixReader", bp::init()); 444 | RandomAccessWrapper > > >("RandomAccessFloat32MatrixMapped",bp::init()); 445 | SequentialReaderWrapper > > >("SequentialFloat32MatrixReader",bp::init()); 446 | WriterWrapper > > >("Float32MatrixWriter", bp::init()); 447 | 448 | //Vectors as NdArrays 449 | RandomAccessWrapper > > >("RandomAccessFloat64VectorReader", bp::init()); 450 | RandomAccessWrapper > > >("RandomAccessFloat64VectorReaderMapped",bp::init()); 451 | SequentialReaderWrapper > > >("SequentialFloat64VectorReader",bp::init()); 452 | WriterWrapper > > >("Float64VectorWriter", bp::init()); 453 | 454 | RandomAccessWrapper > > >("RandomAccessFloat32VectorReader", bp::init()); 455 | RandomAccessWrapper > > >("RandomAccessFloat32VectorReaderMapped",bp::init()); 456 | SequentialReaderWrapper > > >("SequentialFloat32VectorReader",bp::init()); 457 | WriterWrapper > > >("Float32VectorWriter", bp::init()); 458 | 459 | //Integers 460 | RandomAccessWrapper("RandomAccessInt32Reader", bp::init()); 461 | SequentialReaderWrapper("SequentialInt32Reader",bp::init()); 462 | WriterWrapper("Int32Writer", bp::init()); 463 | 464 | // std::vector as ndarray 465 | VectorNDArrayHolder::register_converters(); 466 | RandomAccessWrapper::type > >("RandomAccessInt32VectorReader", bp::init()); 467 | SequentialReaderWrapper::type > >("SequentialInt32VectorReader",bp::init()); 468 | WriterWrapper::type > >("Int32VectorWriter", bp::init()); 469 | 470 | // Vector of simple types as lists 471 | // VectorHolder::register_converters(); 472 | // RandomAccessWrapper::type > >("RandomAccessInt32VectorReader", bp::init()); 473 | // SequentialReaderWrapper::type > >("SequentialInt32VectorReader",bp::init()); 474 | // WriterWrapper::type > >("Int32VectorWriter", bp::init()); 475 | 476 | 477 | // std::vector > 478 | VectorVectorHolder::register_converters(); 479 | RandomAccessWrapper::type > >("RandomAccessInt32VectorVectorReader", bp::init()); 480 | SequentialReaderWrapper::type > >("SequentialInt32VectorVectorReader",bp::init()); 481 | WriterWrapper::type > >("Int32VectorVectorWriter", bp::init()); 482 | 483 | // std::vector > 484 | PairVectorHolder::register_converters(); 485 | RandomAccessWrapper::type > >("RandomAccessInt32PairVectorReader", bp::init()); 486 | SequentialReaderWrapper::type > >("SequentialInt32PairVectorReader",bp::init()); 487 | WriterWrapper::type > >("Int32PairVectorWriter", bp::init()); 488 | 489 | } 490 | -------------------------------------------------------------------------------- /kaldi_io/python_wrappers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * python_wrappers.h 3 | * 4 | * Created on: Aug 28, 2014 5 | * Author: chorows 6 | */ 7 | 8 | #ifndef PYTHON_WRAPPERS_H_ 9 | #define PYTHON_WRAPPERS_H_ 10 | 11 | extern "C" { 12 | #include "Python.h" 13 | #include "numpy/arrayobject.h" 14 | } 15 | 16 | #include 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace kaldi { 30 | //Helper to get proper np type 31 | template 32 | int get_np_type() { 33 | //BOOST_STATIC_ASSERT_MSG(false, "Call one of the explicitly instantiated templates for float or double."); 34 | KALDI_ERR << "Call one of the explicitly instantiated templates for float or double."; 35 | return -1; 36 | } 37 | 38 | template <> 39 | int get_np_type() { 40 | return NPY_DOUBLE; 41 | } 42 | 43 | template <> 44 | int get_np_type() { 45 | return NPY_FLOAT; 46 | } 47 | 48 | template <> 49 | int get_np_type() { 50 | return NPY_INT32; 51 | } 52 | 53 | template 54 | class NpWrapperMatrix : public kaldi::MatrixBase { 55 | public: 56 | NpWrapperMatrix(PyArrayObject* arr) 57 | : kaldi::MatrixBase(), 58 | arr_(arr) { 59 | if (PyArray_NDIM(arr_)!=2) { 60 | PyErr_SetString(PyExc_TypeError, "Can wrap only matrices (2D arrays)"); 61 | boost::python::throw_error_already_set(); 62 | } 63 | if (PyArray_TYPE(arr)!=get_np_type()) { 64 | PyErr_SetString(PyExc_TypeError, "Wrong array dtype"); 65 | boost::python::throw_error_already_set(); 66 | } 67 | npy_intp* dims = PyArray_DIMS(arr_); 68 | npy_intp* strides = PyArray_STRIDES(arr_); 69 | if (strides[1]!=sizeof(Real)) { 70 | PyErr_SetString(PyExc_TypeError, "Wrong array column stride"); 71 | boost::python::throw_error_already_set(); 72 | } 73 | Py_INCREF(arr_); 74 | //why do we have to use this-> in here?? 75 | this->data_ = (Real*)PyArray_DATA(arr); 76 | this->num_rows_ = dims[0]; 77 | this->num_cols_ = dims[1]; 78 | this->stride_ = strides[0]/sizeof(Real); 79 | } 80 | 81 | ~NpWrapperMatrix() { 82 | Py_DECREF(arr_); 83 | } 84 | 85 | protected: 86 | PyArrayObject* arr_; 87 | }; 88 | 89 | template 90 | class NpWrapperVector : public kaldi::VectorBase { 91 | public: 92 | NpWrapperVector(PyArrayObject* arr) 93 | : kaldi::VectorBase(), 94 | arr_(arr) { 95 | if (PyArray_NDIM(arr_)!=1) { 96 | PyErr_SetString(PyExc_TypeError, "Can wrap only vectors (1D arrays)"); 97 | boost::python::throw_error_already_set(); 98 | } 99 | if (PyArray_TYPE(arr)!=get_np_type()) { 100 | PyErr_SetString(PyExc_TypeError, "Wrong array dtype"); 101 | boost::python::throw_error_already_set(); 102 | } 103 | npy_intp* dims = PyArray_DIMS(arr_); 104 | npy_intp* strides = PyArray_STRIDES(arr_); 105 | if (strides[0]!=sizeof(Real)) { 106 | PyErr_SetString(PyExc_TypeError, "Wrong array column stride"); 107 | boost::python::throw_error_already_set(); 108 | } 109 | Py_INCREF(arr_); 110 | //why do we have to use this-> in here?? 111 | this->data_ = (Real*)PyArray_DATA(arr); 112 | this->dim_ = dims[0]; 113 | } 114 | 115 | ~NpWrapperVector() { 116 | Py_DECREF(arr_); 117 | } 118 | 119 | protected: 120 | PyArrayObject* arr_; 121 | }; 122 | 123 | } //namespace kaldi 124 | 125 | 126 | #endif /* PYTHON_WRAPPERS_H_ */ 127 | -------------------------------------------------------------------------------- /scripts/apply-global-cmvn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Created on Aug 6, 2014 5 | 6 | @author: chorows 7 | ''' 8 | 9 | import sys 10 | import os 11 | import logging 12 | import argparse 13 | 14 | import numpy as np 15 | 16 | import kaldi_io 17 | 18 | if __name__ == '__main__': 19 | print >>sys.stderr, os.path.basename(sys.argv[0]), " ".join(sys.argv[1:]) 20 | logging.basicConfig(level=logging.INFO) 21 | 22 | parser = argparse.ArgumentParser(description="""Apply cmvn (cepstral mean and variance normalization). 23 | 24 | If global stats are supplied, normalize by the global stats. Otherwise normalize per-utterance. 25 | """, ) 26 | parser.add_argument('--global-stats', help="Global normalization stats") 27 | parser.add_argument('in_rxfilename') 28 | parser.add_argument('out_wxfilename') 29 | args = parser.parse_args() 30 | 31 | global_normalization = args.global_stats is not None 32 | 33 | 34 | if global_normalization: 35 | logging.info("Applying global normalization") 36 | with kaldi_io.RandomAccessBaseFloatVectorReader(args.global_stats) as stats: 37 | mean = stats['mean'] 38 | mean.shape = 1,-1 39 | std = stats['std'] 40 | std.shape = 1,-1 41 | else: 42 | logging.info("Applying per-utterance normalization") 43 | 44 | reader = kaldi_io.SequentialBaseFloatMatrixReader(args.in_rxfilename) 45 | writer = kaldi_io.BaseFloatMatrixWriter(args.out_wxfilename) 46 | 47 | for name, feats in reader: 48 | if not global_normalization: 49 | mean = feats.mean(0, keepdims=True) 50 | std = feats.std(0, keepdims=True) 51 | feats -= mean 52 | feats /= std 53 | writer.write(name, feats) 54 | -------------------------------------------------------------------------------- /scripts/compute-global-cmvn-stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Created on Aug 6, 2014 5 | 6 | @author: chorows 7 | ''' 8 | 9 | import os 10 | import sys 11 | import logging 12 | import argparse 13 | 14 | import numpy as np 15 | 16 | import kaldi_io 17 | 18 | if __name__ == '__main__': 19 | print >>sys.stderr, os.path.basename(sys.argv[0]), " ".join(sys.argv[1:]) 20 | logging.basicConfig(level=logging.INFO) 21 | 22 | parser = argparse.ArgumentParser(description='Accumulate global stats for feature normalization: mean and std') 23 | parser.add_argument('in_rxfilename') 24 | parser.add_argument('out_wxfilename') 25 | args = parser.parse_args() 26 | 27 | sum = None 28 | sum_sq = None 29 | n = 0 30 | 31 | with kaldi_io.SequentialBaseFloatMatrixReader(args.in_rxfilename) as reader: 32 | for name,feats in reader: 33 | nframes, nfeats = feats.shape 34 | n += nframes 35 | if sum is None: 36 | sum = np.zeros((nfeats,)) 37 | sum_sq = np.zeros((nfeats,)) 38 | 39 | sum += feats.sum(0) 40 | sum_sq += (feats*feats).sum(0) 41 | 42 | mean = np.asarray(sum/n, dtype=kaldi_io.KALDI_BASE_FLOAT()) 43 | std = np.asarray(np.sqrt(sum_sq/n - mean**2), 44 | dtype=kaldi_io.KALDI_BASE_FLOAT()) 45 | 46 | with kaldi_io.BaseFloatVectorWriter(args.out_wxfilename) as w: 47 | w['mean'] = mean 48 | w['std'] = std 49 | -------------------------------------------------------------------------------- /scripts/copy-feats-padded.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Created on Aug 6, 2014 5 | 6 | @author: chorows 7 | ''' 8 | 9 | import os 10 | import sys 11 | import logging 12 | import argparse 13 | 14 | import numpy as np 15 | 16 | import kaldi_io 17 | 18 | if __name__ == '__main__': 19 | print >>sys.stderr, os.path.basename(sys.argv[0]), " ".join(sys.argv[1:]) 20 | logging.basicConfig(level=logging.INFO) 21 | 22 | parser = argparse.ArgumentParser(description='Copy features, additionally padding them (elongating them) by the specified number of frames') 23 | parser.add_argument('--padding', help='With how many frames to pad on each side? [default=0]', default=None) 24 | parser.add_argument('--padding-left', help='With how many frames to pad on the left (defaults to padding if not set)', default=None) 25 | parser.add_argument('--padding-right', help='With how many frames to pad on the right (defaults to padding if not set)', default=None) 26 | parser.add_argument('--padding-mode', default='zero', help='What values to use for padding- zero|copy (edge frames)') 27 | parser.add_argument('--orig-size-wxfilename', help='Where to write the original matrix sizes', default=None) 28 | parser.add_argument('in_rxfilename') 29 | parser.add_argument('out_wxfilename') 30 | args = parser.parse_args() 31 | 32 | if args.padding is not None and args.padding_left is not None and args.padding_right is not None: 33 | logging.error("Can't set padding, padding-left and padding-right at the same time!") 34 | sys.exit(1) 35 | 36 | padding = 0 37 | if args.padding is not None: padding = int(args.padding) 38 | 39 | padding_left = padding 40 | if args.padding_left is not None: padding_left = int(args.padding_left) 41 | 42 | padding_right = padding 43 | if args.padding_right is not None: padding_right = int(args.padding_right) 44 | 45 | if padding_left<0 or padding_right<0: 46 | logging.error("Padding can't be negative!") 47 | sys.exit(1) 48 | 49 | count = 0 50 | logging.info("Padding with %d in the left and %d on the right", padding_left, padding_right) 51 | 52 | #should use with, but if something happens the files will get closed anyways 53 | reader = kaldi_io.SequentialBaseFloatMatrixReader(args.in_rxfilename) 54 | writer = kaldi_io.BaseFloatMatrixWriter(args.out_wxfilename) 55 | 56 | size_writer=None 57 | if args.orig_size_wxfilename is not None: 58 | size_writer = kaldi_io.PythonWriter(args.orig_size_wxfilename) 59 | 60 | for name, value in reader: 61 | count += 1 62 | if padding_left+padding_right==0: 63 | padded = value 64 | else: 65 | num_frames, frame_dim = value.shape 66 | padded = np.empty(shape=(num_frames+padding_left+padding_right, frame_dim), dtype=value.dtype) 67 | 68 | padded[padding_left:padding_left+num_frames,:] = value 69 | 70 | if args.padding_mode == 'zero': 71 | padded[:padding_left,:] = 0.0 72 | padded[padding_left+num_frames:,:] = 0.0 73 | elif args.padding_mode == 'copy': 74 | padded[:padding_left,:] = value[0,:] 75 | padded[padding_left+num_frames:,:] = value[-1,:] 76 | else: 77 | logging.error("Unknown padding mode: %s", args.padding_mode) 78 | sys.exit(1) 79 | writer.write(name, padded) 80 | if size_writer: 81 | size_writer.write(name, value.shape) 82 | 83 | logging.info("Copied %d features", count) 84 | -------------------------------------------------------------------------------- /scripts/show-wav-ali.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ''' 3 | Created on Jul 31, 2014 4 | 5 | @author: chorows 6 | ''' 7 | 8 | import sys 9 | import argparse 10 | import tempfile 11 | from subprocess import check_call 12 | import os 13 | from os import path 14 | 15 | import numpy as np 16 | 17 | import kaldi_io 18 | 19 | if __name__ == '__main__': 20 | parser = argparse.ArgumentParser(description='Extract an utterance and convert the alignment to an Audacity label file') 21 | parser.add_argument('wav', help='wav script file') 22 | parser.add_argument('mdl', help='model file (to get transitions)') 23 | parser.add_argument('ali', help='alignemnt') 24 | parser.add_argument('phn', help='phones.txt') 25 | parser.add_argument('utt', help='utterance') 26 | args = parser.parse_args() 27 | 28 | #temp_dir = tempfile.mkdtemp() 29 | temp_dir = './tmp' 30 | try: 31 | os.mkdir(temp_dir) 32 | except: 33 | pass 34 | 35 | utt=args.utt 36 | 37 | wav_file = path.join(temp_dir, '%s.wav' %(utt,)) 38 | print >>sys.stderr, "Extracting wav utterance %s" % (utt,) 39 | check_call("wav-copy '%s' 'scp,p:echo %s %s|'" % 40 | (args.wav, utt, wav_file), shell=True) 41 | dur_reader = kaldi_io.RandomAccessPythonReader( 42 | "ark:wav-to-duration 'scp:echo %s %s |' ark,t:-|" % 43 | (utt, wav_file)) 44 | dur = dur_reader[utt] 45 | ali_reader = kaldi_io.RandomAccessInt32PairVectorReader( 46 | "ark:ali-to-phones --write-lengths '%s' '%s' 'ark:-' |" % 47 | (args.mdl, args.ali)) 48 | ali = np.array(ali_reader[utt], dtype=float) 49 | num_frames = ali[:,1].sum() 50 | 51 | ali[:,1] = (np.cumsum(ali[:,1]))/num_frames*dur 52 | 53 | phones_dict = {n:p for p,n in kaldi_io.SequentialPythonReader('ark:%s' %(args.phn,))} 54 | 55 | label_file = path.join(temp_dir, '%s.txt'%(utt,)) 56 | last_time = 0.0 57 | with open(label_file, 'w') as lf: 58 | for row in ali: 59 | (phone, time) = row 60 | print >>lf, '%f %f %s' % (last_time, time, phones_dict[phone]) 61 | last_time=time 62 | 63 | 64 | check_call('audacity %s' % (wav_file,), shell=True) 65 | shutil.rmdir(temp_dir) 66 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from distutils.core import setup 5 | from distutils.command.build import build 6 | 7 | class Make(build): 8 | def run(self): 9 | os.system("make") 10 | build.run(self) 11 | 12 | setup(name='kaldi-python', 13 | version='1.0', 14 | description='Python interface for kaldi iterators', 15 | author='Jan Chorowski', 16 | url='https://github.com/janchorowski/kaldi-python', 17 | cmdclass={'build': Make}, 18 | packages=['kaldi_io', 'kaldi_argparse'], 19 | package_data={'kaldi_io': ['kaldi_io_internal.so']}, 20 | scripts=['scripts/apply-global-cmvn.py', 21 | 'scripts/compute-global-cmvn-stats.py', 22 | 'scripts/copy-feats-padded.py', 23 | 'scripts/show-wav-ali.py'], 24 | requires=['numpy']) 25 | --------------------------------------------------------------------------------