├── .dokx ├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── doc ├── hdf5.md └── usage.md ├── hdf5-0-0.rockspec ├── luasrc ├── CMakeLists.txt ├── dataset.lua ├── datasetOptions.lua ├── ffi.lua ├── file.lua ├── group.lua ├── init.lua └── testUtils.lua └── tests ├── benchmark └── benchmark.lua ├── data ├── empty.h5 ├── empty.lua ├── generate.py ├── oneTensor.h5 ├── oneTensor.lua ├── twoTensors.h5 ├── twoTensors.lua ├── twoTensorsNested.h5 └── twoTensorsNested.lua ├── matlab └── testMatlab.m ├── python └── testPython.py ├── testChunking.lua ├── testData.lua ├── testDeflate.lua ├── testReference.lua ├── testSerialization.lua └── testStructure.lua /.dokx: -------------------------------------------------------------------------------- 1 | return { 2 | -- filter: pattern or table of patterns; file paths to include 3 | --filter = nil, 4 | 5 | -- exclude: pattern or table of patterns; file paths to exclude 6 | exclude = { 'README', 'tests', 'hdf5.md' }, 7 | 8 | -- tocLevelTopSection: integer; max depth of table of contents for standalone .md docs 9 | --tocLevelTopSection = nil, 10 | 11 | -- sectionOrder: table; paths of .md files in order of priority 12 | sectionOrder = { "init", "usage" }, 13 | 14 | -- packageName: string; override the inferred package namespace 15 | packageName = "hdf5", 16 | 17 | -- githubURL: string; $githubUser/$githubProject - used for generating links, if present 18 | githubURL = "d11/torch-hdf5", 19 | 20 | -- section: string; name of the section under which this package should be grouped in the main menu 21 | section = "Utilities", 22 | 23 | } 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | 3 | env: 4 | - LUA="th" 5 | 6 | branches: 7 | only: 8 | - master 9 | 10 | before_script: 11 | - sudo apt-get update -qq >/dev/null 12 | - sudo apt-get install -qq gfortran >/dev/null 13 | - sudo apt-get install -qq gcc-multilib gfortran-multilib >/dev/null 14 | - sudo apt-get install -qq liblapack-dev >/dev/null 15 | - sudo apt-get install libhdf5-serial-dev hdf5-tools >/dev/null 16 | - curl -s https://raw.githubusercontent.com/torch/ezinstall/master/install-all | bash 17 | - echo "==== Building torch-hdf5, using luarocks ====" 18 | - sudo /usr/local/bin/luarocks make 19 | 20 | script: 'for x in tests/test*.lua ; do echo "==== $x ====" ; th $x ; done' 21 | 22 | notifications: 23 | email: 24 | on_success: change 25 | on_failure: always 26 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) 2 | CMAKE_POLICY(VERSION 2.6) 3 | FIND_PACKAGE(Torch REQUIRED) 4 | FIND_PACKAGE(HDF5 1.8 REQUIRED) 5 | 6 | ADD_SUBDIRECTORY("luasrc") 7 | 8 | INSTALL(CODE "MESSAGE(\"-- Generating \" ${CMAKE_INSTALL_PREFIX}/${Torch_INSTALL_LUA_PATH_SUBDIR}/hdf5/config.lua) 9 | FILE(WRITE ${CMAKE_INSTALL_PREFIX}/${Torch_INSTALL_LUA_PATH_SUBDIR}/hdf5/config.lua \"hdf5._config = { 10 | HDF5_INCLUDE_PATH = \\\"${HDF5_INCLUDE_DIR}\\\", 11 | HDF5_LIBRARIES = \\\"${HDF5_LIBRARIES}\\\" 12 | }\n\")") 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Want to contribute? Great! First, read this page (including the small print at the end). 2 | 3 | ### Before you contribute 4 | Before we can use your code, you must sign the 5 | [Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1) 6 | (CLA), which you can do online. The CLA is necessary mainly because you own the 7 | copyright to your changes, even after your contribution becomes part of our 8 | codebase, so we need your permission to use and distribute your code. We also 9 | need to be sure of various other things—for instance that you'll tell us if you 10 | know that your code infringes on other people's patents. You don't have to sign 11 | the CLA until after you've submitted your code for review and a member has 12 | approved it, but you must do it before we can put your code into our codebase. 13 | Before you start working on a larger contribution, you should get in touch with 14 | us first through the issue tracker with your idea so that we can help out and 15 | possibly guide you. Coordinating up front makes it much easier to avoid 16 | frustration later on. 17 | 18 | ### Code reviews 19 | All submissions, including submissions by project members, require review. We 20 | use Github pull requests for this purpose. 21 | 22 | ### The small print 23 | Contributions made by corporations are covered by a different agreement than 24 | the one above, the Software Grant and Corporate Contributor License Agreement. 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2014, Google Inc. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | 30 | The following applies to HDF5 itself, which may be used with torch-hdf5: 31 | 32 | 33 | Copyright Notice and License Terms for 34 | HDF5 (Hierarchical Data Format 5) Software Library and Utilities 35 | ----------------------------------------------------------------------------- 36 | 37 | HDF5 (Hierarchical Data Format 5) Software Library and Utilities 38 | Copyright 2006-2013 by The HDF Group. 39 | 40 | NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities 41 | Copyright 1998-2006 by the Board of Trustees of the University of Illinois. 42 | 43 | All rights reserved. 44 | 45 | Redistribution and use in source and binary forms, with or without 46 | modification, are permitted for any purpose (including commercial purposes) 47 | provided that the following conditions are met: 48 | 49 | 1. Redistributions of source code must retain the above copyright notice, 50 | this list of conditions, and the following disclaimer. 51 | 52 | 2. Redistributions in binary form must reproduce the above copyright notice, 53 | this list of conditions, and the following disclaimer in the documentation 54 | and/or materials provided with the distribution. 55 | 56 | 3. In addition, redistributions of modified forms of the source or binary 57 | code must carry prominent notices stating that the original code was 58 | changed and the date of the change. 59 | 60 | 4. All publications or advertising materials mentioning features or use of 61 | this software are asked, but not required, to acknowledge that it was 62 | developed by The HDF Group and by the National Center for Supercomputing 63 | Applications at the University of Illinois at Urbana-Champaign and 64 | credit the contributors. 65 | 66 | 5. Neither the name of The HDF Group, the name of the University, nor the 67 | name of any Contributor may be used to endorse or promote products derived 68 | from this software without specific prior written permission from 69 | The HDF Group, the University, or the Contributor, respectively. 70 | 71 | DISCLAIMER: 72 | THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS 73 | "AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. In no 74 | event shall The HDF Group or the Contributors be liable for any damages 75 | suffered by the users arising out of the use of this software, even if 76 | advised of the possibility of such damage. 77 | 78 | ----------------------------------------------------------------------------- 79 | ----------------------------------------------------------------------------- 80 | 81 | Contributors: National Center for Supercomputing Applications (NCSA) at 82 | the University of Illinois, Fortner Software, Unidata Program Center (netCDF), 83 | The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip), 84 | and Digital Equipment Corporation (DEC). 85 | 86 | ----------------------------------------------------------------------------- 87 | 88 | Portions of HDF5 were developed with support from the Lawrence Berkeley 89 | National Laboratory (LBNL) and the United States Department of Energy 90 | under Prime Contract No. DE-AC02-05CH11231. 91 | 92 | ----------------------------------------------------------------------------- 93 | 94 | Portions of HDF5 were developed with support from the University of 95 | California, Lawrence Livermore National Laboratory (UC LLNL). 96 | The following statement applies to those portions of the product and must 97 | be retained in any redistribution of source code, binaries, documentation, 98 | and/or accompanying materials: 99 | 100 | This work was partially produced at the University of California, 101 | Lawrence Livermore National Laboratory (UC LLNL) under contract 102 | no. W-7405-ENG-48 (Contract 48) between the U.S. Department of Energy 103 | (DOE) and The Regents of the University of California (University) 104 | for the operation of UC LLNL. 105 | 106 | DISCLAIMER: 107 | This work was prepared as an account of work sponsored by an agency of 108 | the United States Government. Neither the United States Government nor 109 | the University of California nor any of their employees, makes any 110 | warranty, express or implied, or assumes any liability or responsibility 111 | for the accuracy, completeness, or usefulness of any information, 112 | apparatus, product, or process disclosed, or represents that its use 113 | would not infringe privately- owned rights. Reference herein to any 114 | specific commercial products, process, or service by trade name, 115 | trademark, manufacturer, or otherwise, does not necessarily constitute 116 | or imply its endorsement, recommendation, or favoring by the United 117 | States Government or the University of California. The views and 118 | opinions of authors expressed herein do not necessarily state or reflect 119 | those of the United States Government or the University of California, 120 | and shall not be used for advertising or product endorsement purposes. 121 | ----------------------------------------------------------------------------- 122 | 123 | 124 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # torch-hdf5 2 | 3 | This package allows you to read and write Torch data from and to [HDF5](http://en.wikipedia.org/wiki/Hierarchical_Data_Format) files. The format is fast, 4 | flexible, and supported by a wide range of other software - including **MATLAB**, 5 | **Python**, and **R**. 6 | 7 | [![Build Status](https://travis-ci.org/deepmind/torch-hdf5.png?branch=master)](https://travis-ci.org/deepmind/torch-hdf5) 8 | ## Usage 9 | 10 | For further information, please consult [the user manual](doc/usage.md). 11 | -------------------------------------------------------------------------------- /doc/hdf5.md: -------------------------------------------------------------------------------- 1 | # Notes on HDF5 2 | 3 | ## General benefits 4 | 5 | ### Portability 6 | 7 | There are libraries for Python, R, C, C++, Matlab, and other software. The 8 | behaviour is also be reliable and consistent across hardware platforms, 9 | endian-ness, and so on. 10 | 11 | ### Thoroughly Tested 12 | 13 | HDF5 is used by [a wide range of large scientific institutions](http://www.hdfgroup.org/HDF5/users5.html). 14 | 15 | ### Tools 16 | 17 | There are already tools available for inspecting, comparing and editing HDF5 files. 18 | 19 | ### Flexible & Extensible 20 | 21 | We can store entire collections of data and metadata in one place, and in a semantically sensible way. 22 | 23 | ### Partial I/O 24 | 25 | We can read and write only the data that we need. 26 | 27 | ### Concurrency 28 | 29 | Potential for concurrent I/O in future, via Parallel HDF5. 30 | 31 | ## Special Features 32 | 33 | Transferred from [h5py docs](http://www.h5py.org/docs/high/dataset.html#special-features). 34 | 35 | ### Chunked storage 36 | 37 | HDF5 can store data in “chunks” indexed by B-trees, as well as in the 38 | traditional contiguous manner. This can dramatically increase I/O performance 39 | for certain patterns of access; for example, reading every n-th element along 40 | the fastest-varying dimension. 41 | 42 | ### Compression 43 | 44 | Transparent lossless compression can substantially reduce the storage space 45 | needed for the dataset. Beginning with h5py 1.1, three techniques are 46 | available, “gzip”, “lzf” and “szip”. 47 | 48 | ### Scale/offset storage & lossy compression 49 | 50 | HDF5 1.8 introduces compression based on truncation to a fixed number of bits 51 | after scaling and shifting data. This can be used, for instance, to do the 52 | following: 53 | 54 | - Losslessly store 12-bit integer data using only 12 bits of storage per value. 55 | - Lossily store 16-bit integer data using 12 bits of storage per value. 56 | - Lossily store floating-point data with a fixed number of digits after the 57 | decimal place. 58 | 59 | ### Error-Detection 60 | 61 | All versions of HDF5 include the fletcher32 checksum filter, which enables 62 | read-time error detection for datasets. If part of a dataset becomes corrupted, 63 | a read operation on that section will immediately fail with an exception. 64 | 65 | ### Resizing 66 | 67 | Datasets can be resized, up to a maximum value provided at creation time. 68 | -------------------------------------------------------------------------------- /doc/usage.md: -------------------------------------------------------------------------------- 1 | # Getting started 2 | 3 | ## Installation 4 | 5 | **** Please note: the central luarocks server has another package called hdf5 (http://colberg.org/lua-hdf5/) - if you use 'luarocks install' you may get that one instead. **** 6 | 7 | **** Please note also: torch-hdf5 now requires version 1.8.14 or greater of hdf5! **** 8 | 9 | ### OS X 10 | 11 | brew tap homebrew/science 12 | brew install hdf5 13 | git clone https://github.com/deepmind/torch-hdf5 14 | cd torch-hdf5 15 | luarocks make hdf5-0-0.rockspec 16 | 17 | Note: if `luarocks make` fails with an unsatisfied dependency, the luarocks being used is likely not the one provided by torch. Try using `[torch install directory]/install/bin/luarocks` instead. 18 | 19 | ### Ubuntu < 13.04 20 | 21 | sudo apt-get install libhdf5-serial-dev hdf5-tools 22 | git clone https://github.com/deepmind/torch-hdf5 23 | cd torch-hdf5 24 | luarocks make hdf5-0-0.rockspec 25 | 26 | ### Ubuntu >= 13.04 27 | 28 | sudo apt-get install libhdf5-serial-dev hdf5-tools 29 | git clone https://github.com/deepmind/torch-hdf5 30 | cd torch-hdf5 31 | luarocks make hdf5-0-0.rockspec LIBHDF5_LIBDIR="/usr/lib/x86_64-linux-gnu/" 32 | 33 | ## Writing from torch 34 | 35 | require 'hdf5' 36 | local myFile = hdf5.open('/path/to/write.h5', 'w') 37 | myFile:write('/path/to/data', torch.rand(5, 5)) 38 | myFile:close() 39 | 40 | ## Reading from torch 41 | 42 | require 'hdf5' 43 | local myFile = hdf5.open('/path/to/read.h5', 'r') 44 | local data = myFile:read('/path/to/data'):all() 45 | myFile:close() 46 | 47 | ## Reading from Matlab 48 | 49 | h5read /path/to/file.h5 /location/of/data 50 | 51 | See the [Matlab documentation](http://www.mathworks.co.uk/help/matlab/hdf5-files.html) for further information. 52 | 53 | ## Reading from Python 54 | 55 | You need to install a library: 56 | 57 | $ pip install h5py 58 | 59 | Then: 60 | 61 | import h5py 62 | myFile = h5py.File('/path/to/file.h5', 'r') 63 | 64 | # The '...' means retrieve the whole tensor 65 | data = myFile['location']['of']['data'][...] 66 | print(data) 67 | 68 | See also the [h5py manual](http://www.h5py.org/docs/). 69 | 70 | ## Reading from R 71 | 72 | You need to install a library: 73 | 74 | source("http://bioconductor.org/biocLite.R") 75 | biocLite("rhdf5") 76 | 77 | Then: 78 | 79 | library(rhdf5) 80 | mydata <- h5read("/path/to/file.h5", "/location/of/data") 81 | str(mydata) 82 | 83 | Alternative libraries for R include **'h5r'** and **'ncdf4'**. 84 | 85 | ## More advanced usage 86 | 87 | ### Compression, chunking, and other options 88 | 89 | You can optionally pass a `DataSetOptions` object to specify how you want data to be written: 90 | 91 | require 'hdf5' 92 | local myFile = hdf5.open('/path/to/write.h5', 'w') 93 | local options = hdf5.DataSetOptions() 94 | options:setChunked(32, 32) 95 | options:setDeflate() 96 | myFile:write('/path/to/data', torch.rand(500, 500), options) 97 | myFile:close() 98 | 99 | ### Partial reading 100 | 101 | You can read from a dataset without loading the whole thing at once: 102 | 103 | local myFile = hdf5.open('/path/to/read.h5','r') 104 | -- Specify the range for each dimension of the dataset. 105 | local data = myFile:read('/path/to/data'):partial({start1, end1}, {start2, end2}) 106 | myFile:close() 107 | 108 | Note that, for efficiency, hdf5 may still load (but not return) more than just the piece you ask for - depending on what options the file was written with. For example, if the dataset is chunked, it should just load the chunks that overlap with the part you ask for. 109 | 110 | ### Size of the data 111 | 112 | Getting the size of the dataset without loading the data: 113 | 114 | local myFile = hdf5.open('/path/to/read.h5','r') 115 | local dim = myFile:read('/path/to/data'):dataspaceSize() 116 | myFile:close() 117 | 118 | ### Tensor Type of the data 119 | 120 | Checking the type of torch.Tensor without loading the data: 121 | 122 | local myFile = hdf5.open('/path/to/read.h5','r') 123 | local factory = myFile:read('/path/to/data'):getTensorFactory() 124 | myFile:close() 125 | 126 | ### Reading HDF5 file from multiple threads 127 | 128 | If you want to use HDF5 from multiple threads, you will need a thread-safe build of the underlying HDF5 library. Otherwise, you will get random crashes. See the [HDF5 docs](https://support.hdfgroup.org/ftp/HDF5/current18/src/unpacked/release_docs/INSTALL) for how to build a thread-safe version. 129 | 130 | If you want to do this from torch you will also need to install torch [threads](https://github.com/torch/threads). Then you can 131 | 132 | local mainfile = hdf5.open('/path/to/read.h5','r') 133 | local nthreads = 2 134 | local data = nil 135 | local worker = function(h5file) 136 | torch.setnumthreads(1) 137 | print(__threadid) 138 | return h5file:read("data" .. __threadid):all() 139 | end 140 | local pool = threads.Threads(nthreads, function(threadid) require'torch' require'hdf5'end) 141 | pool:specific(true) 142 | 143 | for i=1,nthreads do 144 | pool:addjob(i, worker, function(_data) data = _data end, mainfile) 145 | end 146 | for i=1,nthreads do 147 | pool:dojob() 148 | print(data:size(1)==10) 149 | end 150 | mainfile:close() 151 | 152 | ## Command-line 153 | 154 | There are also a number of handy command-line tools. 155 | 156 | ### h5ls 157 | 158 | Lists specified features of HDF5 file contents. 159 | 160 | ### h5dump 161 | 162 | Examine the contents of an HDF5 file and dump those contents to an ASCII file. 163 | 164 | ### h5diff 165 | 166 | Compare two HDF5 files. 167 | 168 | ### h5copy 169 | 170 | Copies HDF5 objects from a file to a new file 171 | 172 | ### Other 173 | 174 | See [this page](http://www.hdfgroup.org/HDF5/doc/RM/Tools.html) for many more HDF5 tools. 175 | 176 | ## Elsewhere 177 | 178 | Libraries for many other languages and tools exist, too. See [this list](http://en.wikipedia.org/wiki/Hierarchical_Data_Format#Interfaces) for more information. 179 | -------------------------------------------------------------------------------- /hdf5-0-0.rockspec: -------------------------------------------------------------------------------- 1 | package = 'hdf5' 2 | version = '0-0' 3 | 4 | source = { 5 | url = 'git://github.com/d11/torch-hdf5.git', 6 | branch = 'master' 7 | } 8 | 9 | description = { 10 | summary = "Interface to HDF5 library", 11 | homepage = "http://d11.github.io/torch-hdf5", 12 | detailed = "Read and write Torch tensor data to and from Hierarchical Data Format files.", 13 | license = "BSD", 14 | maintainer = "Dan Horgan " 15 | } 16 | 17 | dependencies = { 'torch >= 7.0', 'penlight', 'totem' } 18 | build = { 19 | type = "command", 20 | build_command = [[ 21 | cmake -E make_directory build; 22 | cd build; 23 | cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)"; 24 | $(MAKE) 25 | ]], 26 | install_command = "cd build && $(MAKE) install" 27 | } 28 | -------------------------------------------------------------------------------- /luasrc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # install the lua code for the hdf5 package 2 | FILE(GLOB luasrc "*.lua") 3 | ADD_TORCH_PACKAGE(hdf5 "" "${luasrc}") 4 | -------------------------------------------------------------------------------- /luasrc/dataset.lua: -------------------------------------------------------------------------------- 1 | local torch = require "torch" 2 | 3 | -- Lua 5.2 compatibility 4 | local unpack = unpack or table.unpack 5 | 6 | local HDF5DataSet = torch.class("hdf5.HDF5DataSet") 7 | 8 | --[[ Get the sizes and max sizes of an HDF5 dataspace, returning them in Lua tables ]] 9 | local function getDataspaceSize(nDims, spaceID) 10 | local size_t = hdf5.ffi.typeof("hsize_t[" .. nDims .. "]") 11 | local dims = size_t() 12 | local maxDims = size_t() 13 | if hdf5.C.H5Sget_simple_extent_dims(spaceID, dims, maxDims) ~= nDims then 14 | error("Failed getting dataspace size") 15 | end 16 | local size = {} 17 | local maxSize = {} 18 | for k = 1, nDims do 19 | size[k] = tonumber(dims[k-1]) 20 | maxSize[k] = tonumber(maxDims[k-1]) 21 | end 22 | return size, maxSize 23 | end 24 | 25 | local function longStorageToHSize(storage, n) 26 | local out = hdf5.ffi.new("hsize_t[" .. n .. "]") 27 | for k = 1, n do 28 | out[k-1] = storage[k] 29 | end 30 | return out 31 | end 32 | 33 | --[[ Create an HDF5 dataspace corresponding to a given tensor ]] 34 | local function createTensorDataspace(tensor) 35 | local n = tensor:nDimension() 36 | local dataspaceID = hdf5.C.H5Screate_simple( 37 | n, 38 | longStorageToHSize(tensor:size(), n), 39 | longStorageToHSize(tensor:size(), n) 40 | ) 41 | return dataspaceID 42 | end 43 | 44 | function HDF5DataSet:__init(parent, datasetID, dataspaceID) 45 | assert(parent) 46 | assert(datasetID) 47 | self._parent = parent 48 | self._datasetID = datasetID 49 | self._dataspaceID = dataspaceID or hdf5.C.H5Dget_space(self._datasetID) 50 | hdf5._logger.debug("Initialising " .. tostring(self)) 51 | end 52 | 53 | function HDF5DataSet:_refresh_dataspace() 54 | local status = hdf5.C.H5Sclose(self._dataspaceID) 55 | assert(status >= 0, "error refreshing dataspace") 56 | self._dataspaceID = hdf5.C.H5Dget_space(self._datasetID) 57 | return self._dataspaceID 58 | end 59 | 60 | function HDF5DataSet:__tostring() 61 | return "[HDF5DataSet " .. hdf5._describeObject(self._datasetID) .. "]" 62 | end 63 | 64 | function HDF5DataSet:all() 65 | 66 | -- Create a new tensor of the correct type and size 67 | local nDims = hdf5.C.H5Sget_simple_extent_ndims(self._dataspaceID) 68 | local size = getDataspaceSize(nDims, self._dataspaceID) 69 | local factory, nativeType = self:getTensorFactory() 70 | 71 | local tensor = factory():resize(unpack(size)) 72 | 73 | -- Read data into the tensor 74 | local dataPtr = tensor:data() 75 | local status = hdf5.C.H5Dread(self._datasetID, nativeType, hdf5.H5S_ALL, hdf5.H5S_ALL, hdf5.H5P_DEFAULT, dataPtr) 76 | 77 | if status < 0 then 78 | error("HDF5DataSet:all() - failed reading data from " .. tostring(self)) 79 | end 80 | hdf5.C.H5Tclose(nativeType) 81 | 82 | return tensor 83 | end 84 | 85 | function HDF5DataSet:getTensorFactory() 86 | local typeID = hdf5.C.H5Dget_type(self._datasetID) 87 | local nativeType = hdf5.C.H5Tget_native_type(typeID, hdf5.C.H5T_DIR_ASCEND) 88 | local torchType = hdf5._getTorchType(typeID) 89 | hdf5.C.H5Tclose(typeID) 90 | if not torchType then 91 | error("Could not find torch type for native type " .. tostring(nativeType)) 92 | end 93 | if not nativeType then 94 | error("Cannot find hdf5 native type for " .. torchType) 95 | end 96 | if not hdf5.C.H5Sis_simple(self._dataspaceID) then 97 | error("Error: complex dataspaces are not supported!") 98 | end 99 | local factory = torch.factory(torchType) 100 | if not factory then 101 | error("No torch factory for type " .. torchType) 102 | end 103 | return factory, nativeType 104 | end 105 | 106 | local function rangesToOffsetAndCount(ranges) 107 | local offset = hdf5.ffi.new("hsize_t[" .. #ranges+1 .. "]") 108 | local count = hdf5.ffi.new("hsize_t[" .. #ranges+1 .. "]") 109 | 110 | for k, range in ipairs(ranges) do 111 | if type(range) ~= 'table' then 112 | range = { range, range } 113 | end 114 | offset[k-1] = range[1] - 1 115 | count[k-1] = range[2] - range[1] + 1 116 | end 117 | return offset, count 118 | end 119 | 120 | local function hsizeToLongStorage(hsize, n) 121 | local out = torch.LongStorage(n) 122 | for k = 1, n do 123 | out[k] = tonumber(hsize[k-1]) 124 | end 125 | return out 126 | end 127 | 128 | function HDF5DataSet:partial(...) 129 | local ranges = { ... } 130 | local nDims = hdf5.C.H5Sget_simple_extent_ndims(self._dataspaceID) 131 | if #ranges ~= nDims then 132 | error("HDF5DataSet:partial() - dimension mismatch. Expected " .. nDims .. " but " .. #ranges .. " were given.") 133 | end 134 | -- TODO dedup 135 | local null = hdf5.ffi.new("hsize_t *") 136 | local offset, count = rangesToOffsetAndCount(ranges) 137 | -- Create a new tensor of the correct type and size 138 | local factory, nativeType = self:getTensorFactory() 139 | local tensor = factory():resize(hsizeToLongStorage(count, #ranges)) 140 | 141 | local stride = null 142 | 143 | -- TODO clone space first? 144 | local status = hdf5.C.H5Sselect_hyperslab(self._dataspaceID, hdf5.C.H5S_SELECT_SET, offset, stride, count, null) 145 | if status < 0 then 146 | error("Cannot select hyperslab " .. tostring(...) .. " from " .. tostring(self)) 147 | end 148 | 149 | hdf5._logger.debug("HDF5DataSet:partial() - selected " 150 | .. tostring(hdf5.C.H5Sget_select_npoints(self._dataspaceID)) .. " points" 151 | ) 152 | 153 | local tensorDataspace = createTensorDataspace(tensor) 154 | -- Read data into the tensor 155 | local dataPtr = tensor:data() 156 | status = hdf5.C.H5Dread(self._datasetID, nativeType, tensorDataspace, self._dataspaceID, hdf5.H5P_DEFAULT, dataPtr) 157 | -- delete tensor dataspace 158 | local dataspace_status = hdf5.C.H5Sclose(tensorDataspace) 159 | 160 | assert(status >=0, "HDF5DataSet:partial() - failed reading data from " .. tostring(self)) 161 | assert(dataspace_status >= 0, "HDF5DataSet:partial() - error closing tensor dataspace for " .. tostring(self)) 162 | hdf5.C.H5Tclose(nativeType) 163 | return tensor 164 | end 165 | 166 | function HDF5DataSet:close() 167 | hdf5._logger.debug("Closing " .. tostring(self)) 168 | local status = hdf5.C.H5Dclose(self._datasetID) 169 | if status < 0 then 170 | error("Failed closing dataset for " .. tostring(self)) 171 | end 172 | status = hdf5.C.H5Sclose(self._dataspaceID) 173 | if status < 0 then 174 | error("Failed closing dataspace for " .. tostring(self)) 175 | end 176 | end 177 | 178 | function HDF5DataSet:dataspaceSize() 179 | local nDims = hdf5.C.H5Sget_simple_extent_ndims(self._dataspaceID) 180 | local size = getDataspaceSize(nDims, self._dataspaceID) 181 | return size 182 | end 183 | -------------------------------------------------------------------------------- /luasrc/datasetOptions.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Object for specifying HDF5 options to use with a dataset. 4 | 5 | ]] 6 | local torch = require 'torch' 7 | local stringx = require 'pl.stringx' 8 | 9 | -- Lua 5.2 compatibility 10 | local unpack = unpack or table.unpack 11 | 12 | local DataSetOptions, parent = torch.class("hdf5.DataSetOptions") 13 | 14 | --[[ Constructor. No parameters. 15 | 16 | Example: 17 | 18 | options = hdf5.DataSetOptions() 19 | options:setChunking(32, 32, 32) 20 | options:setDeflate() 21 | 22 | Returns: new DataSetOptions object 23 | ]] 24 | function DataSetOptions:__init() 25 | if hdf5.version[1] >= 1 and hdf5.version[2] >= 8 and hdf5.version[3] >= 14 then 26 | self._creationProperties = hdf5.C.H5Pcreate(hdf5.C.H5P_CLS_DATASET_CREATE_ID_g) 27 | else 28 | self._creationProperties = hdf5.C.H5Pcreate(hdf5.C.H5P_CLS_DATASET_CREATE_g) 29 | end 30 | self._chunking = nil 31 | end 32 | 33 | --[[ Modify the options, if necessary, to make them compatible with the given data ]] 34 | function DataSetOptions:adjustForData(tensor) 35 | if self._chunking then 36 | if #self._chunking ~= tensor:nDimension() then 37 | error("Chunk size must have same number of dimensions as data! Chunk size has " 38 | .. tostring(#self._chunking) .. " dimensions; data has " .. tensor:nDimension()) 39 | end 40 | 41 | -- If the data is smaller than the specified chunk size, make the chunk 42 | -- smaller in that dimension 43 | for k, size in ipairs(self._chunking) do 44 | local tensorSize = tensor:size(k) 45 | if self._chunking[k] > tensorSize then 46 | self._chunking[k] = tensorSize 47 | end 48 | end 49 | self:setChunked(unpack(self._chunking)) 50 | end 51 | end 52 | 53 | --[[ Use chunked mode for writing data. Must be enabled to use compression and 54 | other filters, or for efficient partial I/O. By default, chunking is disabled. 55 | 56 | You must specify the chunk size to use. This can have a significant effect on 57 | performance. In particular, using too small a chunk size relative to the data 58 | will slow things down a lot. 59 | 60 | Parameters: 61 | * `size1` - size in first dimension 62 | * `size2` - size in second dimension, if appropriate 63 | * `size3` - size in third dimension, if appropriate 64 | * `...` - more sizes, as needed 65 | 66 | You should provide as many sizes as there are dimensions in your data. 67 | 68 | Returns: 69 | `self` - the modified DataSetOptions object is returned, thus allowing for chaining of method calls 70 | 71 | ]] 72 | function DataSetOptions:setChunked(...) 73 | local chunking = { ... } 74 | local chunkDims = hdf5.ffi.new("hsize_t[" .. #chunking .. "]") 75 | for k, size in ipairs(chunking) do 76 | chunkDims[k-1] = size 77 | end 78 | hdf5.C.H5Pset_chunk(self._creationProperties, #chunking, chunkDims) 79 | self._chunking = chunking 80 | return self 81 | end 82 | 83 | --[[ Use the DEFLATE algorithm (zlib) to compress chunks of data 84 | 85 | Parameters: 86 | * `level` - level of compression to apply (1-10) [default 6] 87 | 88 | Returns: 89 | `self` - the modified DataSetOptions object is returned, thus allowing for chaining of method calls 90 | 91 | ]] 92 | function DataSetOptions:setDeflate(level) 93 | level = level or 6 94 | if not hdf5._deflateAvailable() then 95 | error("DataSetOptions:setDeflate() - DEFLATE is not available, with your build of HDF5") 96 | end 97 | hdf5.C.H5Pset_deflate(self._creationProperties, level) 98 | return self 99 | end 100 | 101 | function DataSetOptions:creationProperties() 102 | return self._creationProperties 103 | end 104 | 105 | --[[ Close the DataSetOptions object. This should be done after use to free resources. ]] 106 | function DataSetOptions:close() 107 | hdf5.H5Pclose(self._creationProperties) 108 | end 109 | 110 | function DataSetOptions:__tostring() 111 | local description = "[DataSetOptions:" 112 | description = description .. " chunking=" .. (self._chunking and stringx.join("x", self._chunking) or "none") 113 | description = description .. "]" 114 | return description 115 | end 116 | -------------------------------------------------------------------------------- /luasrc/ffi.lua: -------------------------------------------------------------------------------- 1 | local torch = require 'torch' 2 | local ffi = require 'ffi' 3 | local bit = require 'bit' 4 | local stringx = require 'pl.stringx' 5 | local path = require 'pl.path' 6 | 7 | local function loadHDF5Library(libraryPaths) 8 | local libraries = stringx.split(libraryPaths, ";") 9 | local hdf5LibPath 10 | for _, libPath in ipairs(libraries) do 11 | local basename = path.basename(libPath) 12 | local name, ext = path.splitext(basename) 13 | if name == 'libhdf5' then 14 | hdf5LibPath = libPath 15 | break 16 | end 17 | end 18 | 19 | if not hdf5LibPath then 20 | error("Error: unable to find a valid HDF5 lib path in the config") 21 | end 22 | 23 | -- If the path from the config isn't valid, fall back to the default search mechanism 24 | if not path.isfile(hdf5LibPath) then 25 | hdf5._logger.warn("Unable to find the HDF5 lib we were built against - trying to find it elsewhere") 26 | hdf5LibPath = "hdf5" 27 | end 28 | 29 | local hdf5lib = ffi.load(hdf5LibPath) 30 | if not hdf5lib then 31 | error("torch-hdf5: unable to load libhdf5!") 32 | end 33 | return hdf5lib 34 | end 35 | 36 | local function loadHDF5Header(includePath) 37 | 38 | -- Pass the header file through the C preprocessor once 39 | local headerPath = nil 40 | -- Some hdf5 installations (e.g. brew's 1.10) are going to have several include dirs, search them all 41 | local includes = {} 42 | for dir in string.gmatch(includePath, '[^;]+') do 43 | local headerCandidate = path.join(dir, "hdf5.h") 44 | if path.isfile(headerCandidate) then 45 | headerPath = headerCandidate 46 | end 47 | table.insert(includes, dir) 48 | end 49 | hdf5._logger.debug("Processing header " .. headerPath) 50 | if headerPath == nil or not path.isfile(headerPath) then 51 | error("Error: unable to locate HDF5 header file at " .. headerPath) 52 | end 53 | local include_opts = "" 54 | for _,dir in ipairs(includes) do 55 | include_opts = include_opts .. " -I" .. dir 56 | end 57 | 58 | local process = io.popen("gcc -D '_Nullable=' -E " .. headerPath .. include_opts) 59 | local contents = process:read("*all") 60 | local success, errorMsg, returnCode = process:close() 61 | if returnCode ~= 0 then 62 | error("Error: unable to parse HDF5 header file at " .. headerPath) 63 | end 64 | 65 | -- Strip out the extra junk that GCC returns 66 | local cdef = "" 67 | for _, line in ipairs(stringx.splitlines(contents)) do 68 | if not stringx.startswith(line, '#') then 69 | cdef = cdef .. line .. "\n" 70 | end 71 | end 72 | 73 | ffi.cdef(cdef) 74 | end 75 | 76 | hdf5.C = loadHDF5Library(hdf5._config.HDF5_LIBRARIES) 77 | loadHDF5Header(hdf5._config.HDF5_INCLUDE_PATH) 78 | 79 | -- Initialize HDF5 80 | hdf5.C.H5open() 81 | 82 | local function checkHDF5Version() 83 | local maj = ffi.new('unsigned int[1]') 84 | local min = ffi.new('unsigned int[1]') 85 | local rel = ffi.new('unsigned int[1]') 86 | hdf5.C.H5get_libversion(maj, min, rel) 87 | if maj[0] ~= 1 or min[0] ~= 8 then 88 | error("Unsupported HDF5 version: " .. maj[0] .. "." .. min[0] .. "." .. rel[0]) 89 | end 90 | hdf5.version = {tonumber(maj[0]), tonumber(min[0]), tonumber(rel[0])} 91 | -- This is disabled as it's a bit too specific 92 | -- hdf5.C.H5check_version(1, 8, 12) 93 | end 94 | hdf5.ffi = ffi 95 | checkHDF5Version() 96 | 97 | --[[ 98 | 99 | Adding definitions for global constants 100 | 101 | ]] 102 | 103 | -- H5Tpublic.h 104 | local function addConstants(tableName, constantNames, func) 105 | if not func then 106 | func = function(x) return x end 107 | end 108 | if not hdf5[tableName] then 109 | hdf5[tableName] = { } 110 | end 111 | for _, name in ipairs(constantNames) do 112 | hdf5[tableName][name] = hdf5.C[func(name)] 113 | end 114 | end 115 | 116 | local function addH5t(x) return "H5T_" .. x end 117 | addConstants('h5t', { 118 | 'NO_CLASS', 119 | 'INTEGER', 120 | 'FLOAT', 121 | 'TIME', 122 | 'STRING', 123 | 'BITFIELD', 124 | 'OPAQUE', 125 | 'COMPOUND', 126 | 'REFERENCE', 127 | 'ENUM', 128 | 'VLEN', 129 | 'ARRAY', 130 | 'NCLASSES', 131 | }, addH5t) 132 | local function addG(x) return addH5t(x) .. "_g" end 133 | 134 | addConstants('h5t', { 135 | 'IEEE_F32BE', 136 | 'IEEE_F32LE', 137 | 'IEEE_F64BE', 138 | 'IEEE_F64LE', 139 | }, addG) 140 | 141 | addConstants('h5t', { 142 | 'STD_I8BE', 143 | 'STD_I8LE', 144 | 'STD_I16BE', 145 | 'STD_I16LE', 146 | 'STD_I32BE', 147 | 'STD_I32LE', 148 | 'STD_I64BE', 149 | 'STD_I64LE', 150 | 'STD_U8BE', 151 | 'STD_U8LE', 152 | 'STD_U16BE', 153 | 'STD_U16LE', 154 | 'STD_U32BE', 155 | 'STD_U32LE', 156 | 'STD_U64BE', 157 | 'STD_U64LE', 158 | 'STD_B8BE', 159 | 'STD_B8LE', 160 | 'STD_B16BE', 161 | 'STD_B16LE', 162 | 'STD_B32BE', 163 | 'STD_B32LE', 164 | 'STD_B64BE', 165 | 'STD_B64LE', 166 | 'STD_REF_OBJ', 167 | 'STD_REF_DSETREG', 168 | }, addG) 169 | 170 | addConstants('h5t', { 171 | 'NATIVE_SCHAR', 172 | 'NATIVE_UCHAR', 173 | 'NATIVE_SHORT', 174 | 'NATIVE_USHORT', 175 | 'NATIVE_INT', 176 | 'NATIVE_UINT', 177 | 'NATIVE_LONG', 178 | 'NATIVE_ULONG', 179 | 'NATIVE_LLONG', 180 | 'NATIVE_ULLONG', 181 | 'NATIVE_FLOAT', 182 | 'NATIVE_DOUBLE', 183 | 'NATIVE_LDOUBLE', 184 | 'NATIVE_B8', 185 | 'NATIVE_B16', 186 | 'NATIVE_B32', 187 | 'NATIVE_B64', 188 | 'NATIVE_OPAQUE', 189 | 'NATIVE_HADDR', 190 | 'NATIVE_HSIZE', 191 | 'NATIVE_HSSIZE', 192 | 'NATIVE_HERR', 193 | 'NATIVE_HBOOL', 194 | 'NATIVE_INT8', 195 | 'NATIVE_UINT8', 196 | 'NATIVE_INT_LEAST8', 197 | 'NATIVE_UINT_LEAST8', 198 | 'NATIVE_INT_FAST8', 199 | 'NATIVE_UINT_FAST8', 200 | 'NATIVE_INT16', 201 | 'NATIVE_UINT16', 202 | 'NATIVE_INT_LEAST16', 203 | 'NATIVE_UINT_LEAST16', 204 | 'NATIVE_INT_FAST16', 205 | 'NATIVE_UINT_FAST16', 206 | 'NATIVE_INT32', 207 | 'NATIVE_UINT32', 208 | 'NATIVE_INT_LEAST32', 209 | 'NATIVE_UINT_LEAST32', 210 | 'NATIVE_INT_FAST32', 211 | 'NATIVE_UINT_FAST32', 212 | 'NATIVE_INT64', 213 | 'NATIVE_UINT64', 214 | 'NATIVE_INT_LEAST64', 215 | 'NATIVE_UINT_LEAST64', 216 | 'NATIVE_INT_FAST64', 217 | 'NATIVE_UINT_FAST64', 218 | }, addG) 219 | 220 | hdf5.H5F_ACC_RDONLY = 0x0000 -- absence of rdwr => rd-only 221 | hdf5.H5F_ACC_RDWR = 0x0001 -- open for read and write 222 | hdf5.H5F_ACC_TRUNC = 0x0002 -- overwrite existing files 223 | hdf5.H5F_ACC_EXCL = 0x0004 -- fail if file already exists 224 | hdf5.H5F_ACC_DEBUG = 0x0008 -- print debug info 225 | hdf5.H5F_ACC_CREAT = 0x0010 -- create non-existing files 226 | 227 | hdf5.H5F_OBJ_FILE = 0x0001 -- File objects 228 | hdf5.H5F_OBJ_DATASET = 0x0002 -- Dataset objects 229 | hdf5.H5F_OBJ_GROUP = 0x0004 -- Group objects 230 | hdf5.H5F_OBJ_DATATYPE = 0x0008 -- Named datatype objects 231 | hdf5.H5F_OBJ_ATTR = 0x0010 -- Attribute objects 232 | hdf5.H5F_OBJ_ALL = bit.bor( 233 | hdf5.H5F_OBJ_FILE, 234 | hdf5.H5F_OBJ_DATASET, 235 | hdf5.H5F_OBJ_GROUP, 236 | hdf5.H5F_OBJ_DATATYPE, 237 | hdf5.H5F_OBJ_ATTR 238 | ) 239 | hdf5.H5F_OBJ_LOCAL = 0x0020 -- Restrict search to objects opened through current file ID 240 | -- (as opposed to objects opened through any file ID accessing this file) 241 | 242 | hdf5.H5P_DEFAULT = 0 243 | hdf5.H5S_ALL = 0 244 | hdf5.H5F_UNLIMITED = ffi.new('hsize_t', ffi.cast('hssize_t',-1)) 245 | hdf5.H5S_SELECT_SET = 0 246 | 247 | -- This table specifies which exact format a given type of Tensor should be saved as. 248 | local fileTypeMap = { 249 | ["torch.ByteTensor"] = hdf5.h5t.STD_U8LE, 250 | ["torch.CharTensor"] = hdf5.h5t.STD_I8LE, 251 | ["torch.ShortTensor"] = hdf5.h5t.STD_I16LE, 252 | ["torch.IntTensor"] = hdf5.h5t.STD_I32LE, 253 | ["torch.LongTensor"] = hdf5.h5t.STD_I64LE, 254 | ["torch.FloatTensor"] = hdf5.h5t.IEEE_F32LE, 255 | ["torch.DoubleTensor"] = hdf5.h5t.IEEE_F64LE 256 | } 257 | 258 | function hdf5._outputTypeForTensorType(tensorType) 259 | return fileTypeMap[tensorType] 260 | end 261 | 262 | -- This table tells HDF5 what format to read a given Tensor's data into memory as. 263 | local nativeTypeMap = { 264 | ["torch.ByteTensor"] = hdf5.h5t.NATIVE_UCHAR, 265 | ["torch.CharTensor"] = hdf5.h5t.NATIVE_SCHAR, 266 | ["torch.ShortTensor"] = hdf5.h5t.NATIVE_SHORT, 267 | ["torch.IntTensor"] = hdf5.h5t.NATIVE_INT, 268 | ["torch.LongTensor"] = hdf5.h5t.NATIVE_LONG, 269 | ["torch.FloatTensor"] = hdf5.h5t.NATIVE_FLOAT, 270 | ["torch.DoubleTensor"] = hdf5.h5t.NATIVE_DOUBLE, 271 | } 272 | 273 | function hdf5._nativeTypeForTensorType(tensorType) 274 | local nativeType = nativeTypeMap[tensorType] 275 | if nativeType == nil then 276 | error("Cannot find hdf5 native type for " .. tensorType) 277 | end 278 | return nativeType 279 | end 280 | 281 | -- This table lets us stringify HDF5 datatype classes 282 | local classMap = {} 283 | classMap[tonumber(hdf5.h5t.NO_CLASS)] = 'NO_CLASS' 284 | classMap[tonumber(hdf5.h5t.INTEGER)] = 'INTEGER' 285 | classMap[tonumber(hdf5.h5t.FLOAT)] = 'FLOAT' 286 | classMap[tonumber(hdf5.h5t.TIME)] = 'TIME' 287 | classMap[tonumber(hdf5.h5t.STRING)] = 'STRING' 288 | classMap[tonumber(hdf5.h5t.BITFIELD)] = 'BITFIELD' 289 | classMap[tonumber(hdf5.h5t.OPAQUE)] = 'OPAQUE' 290 | classMap[tonumber(hdf5.h5t.COMPOUND)] = 'COMPOUND' 291 | classMap[tonumber(hdf5.h5t.REFERENCE)] = 'REFERENCE' 292 | classMap[tonumber(hdf5.h5t.ENUM)] = 'ENUM' 293 | classMap[tonumber(hdf5.h5t.VLEN)] = 'VLEN' 294 | classMap[tonumber(hdf5.h5t.ARRAY)] = 'ARRAY' 295 | classMap[tonumber(hdf5.h5t.NCLASSES)] = 'NCLASSES' 296 | 297 | local typeMap = {} 298 | 299 | typeMap[tonumber(hdf5.C.H5I_UNINIT)] = 'UNINIT' -- uninitialized type 300 | typeMap[tonumber(hdf5.C.H5I_BADID)] = 'BADID' -- invalid Type 301 | typeMap[tonumber(hdf5.C.H5I_FILE)] = 'FILE' -- type ID for File objects 302 | typeMap[tonumber(hdf5.C.H5I_GROUP)] = 'GROUP' -- type ID for Group objects 303 | typeMap[tonumber(hdf5.C.H5I_DATATYPE)] = 'DATATYPE' -- type ID for Datatype objects 304 | typeMap[tonumber(hdf5.C.H5I_DATASPACE)] = 'DATASPACE' -- type ID for Dataspace objects 305 | typeMap[tonumber(hdf5.C.H5I_DATASET)] = 'DATASET' -- type ID for Dataset objects 306 | typeMap[tonumber(hdf5.C.H5I_ATTR)] = 'ATTR' -- type ID for Attribute objects 307 | typeMap[tonumber(hdf5.C.H5I_REFERENCE)] = 'REFERENCE ' -- type ID for Reference objects 308 | typeMap[tonumber(hdf5.C.H5I_VFL)] = 'VFL' -- type ID for virtual file layer 309 | typeMap[tonumber(hdf5.C.H5I_GENPROP_CLS)] = 'GENPROP_CLS' -- type ID for generic property list classes 310 | typeMap[tonumber(hdf5.C.H5I_GENPROP_LST)] = 'GENPROP_LST' -- type ID for generic property lists 311 | typeMap[tonumber(hdf5.C.H5I_ERROR_CLASS)] = 'ERROR_CLASS' -- type ID for error classes 312 | typeMap[tonumber(hdf5.C.H5I_ERROR_MSG)] = 'ERROR_MSG' -- type ID for error messages 313 | typeMap[tonumber(hdf5.C.H5I_ERROR_STACK)] = 'ERROR_STACK' -- type ID for error stacks 314 | typeMap[tonumber(hdf5.C.H5I_NTYPES)] = 'NTYPES' -- number of library types, MUST BE LAST! 315 | 316 | function hdf5._datatypeName(typeID) 317 | local classID = tonumber(hdf5.C.H5Tget_class(typeID)) 318 | local className = classMap[classID] 319 | if not className then 320 | error("Unknown class for type " .. tostring(typeID)) 321 | end 322 | return className 323 | end 324 | 325 | function hdf5._getTorchType(typeID) 326 | local className = hdf5._datatypeName(typeID) 327 | local size = tonumber(hdf5.C.H5Tget_size(typeID)) 328 | if className == 'INTEGER' then 329 | if size == 1 then 330 | return 'torch.ByteTensor' 331 | end 332 | if size == 2 then 333 | return 'torch.ShortTensor' 334 | end 335 | if size == 4 then 336 | return 'torch.IntTensor' 337 | end 338 | if size == 8 then 339 | return 'torch.LongTensor' 340 | end 341 | error("Cannot support reading integer data with size = " .. size .. " bytes") 342 | elseif className == 'FLOAT' then 343 | if size == 4 then 344 | return 'torch.FloatTensor' 345 | end 346 | if size == 8 then 347 | return 'torch.DoubleTensor' 348 | end 349 | error("Cannot support reading float data with size = " .. size .. " bytes") 350 | 351 | else 352 | error("Reading data of class " .. tostring(className) .. "(" .. typeID .. ") is unsupported") 353 | end 354 | end 355 | 356 | 357 | function hdf5._getObjectName(objectID) 358 | local name = ffi.new('char[255]') 359 | hdf5.C.H5Iget_name(objectID, name, 255) 360 | return ffi.string(name) 361 | end 362 | 363 | function hdf5._getObjectType(objectID) 364 | local typeID = hdf5.C.H5Iget_type(objectID) 365 | if typeID == hdf5.C.H5I_BADID then 366 | error("Error getting type for object " .. objectID) 367 | end 368 | if typeID == hdf5.C.H5I_DATATYPE then 369 | return "DATATYPE (" .. hdf5._datatypeName(typeID) .. ")" 370 | end 371 | local typeName = typeMap[tonumber(typeID)] 372 | if not typeName then 373 | error("Could not get name for type " .. tostring(typeID)) 374 | end 375 | return typeName 376 | end 377 | 378 | function hdf5._describeObject(objectID) 379 | return "(" .. tostring(objectID) .. " " 380 | .. hdf5._getObjectName(objectID) .. " " 381 | .. hdf5._getObjectType(objectID) .. ")" 382 | end 383 | 384 | hdf5.H5Z_FILTER_ERROR = -1 -- no filter 385 | hdf5.H5Z_FILTER_NONE = 0 -- reserved indefinitely 386 | hdf5.H5Z_FILTER_DEFLATE = 1 -- deflation like gzip 387 | hdf5.H5Z_FILTER_SHUFFLE = 2 -- shuffle the data 388 | hdf5.H5Z_FILTER_FLETCHER32 = 3 -- fletcher32 checksum of EDC 389 | hdf5.H5Z_FILTER_SZIP = 4 -- szip compression 390 | hdf5.H5Z_FILTER_NBIT = 5 -- nbit compression 391 | hdf5.H5Z_FILTER_SCALEOFFSET = 6 -- scale+offset compression 392 | hdf5.H5Z_FILTER_RESERVED = 256 -- filter ids below this value are reserved for library use 393 | hdf5.H5Z_FILTER_MAX = 65535 -- maximum filter id 394 | hdf5.H5Z_FILTER_CONFIG_ENCODE_ENABLED = 0x0001 395 | hdf5.H5Z_FILTER_CONFIG_DECODE_ENABLED = 0x0002 396 | 397 | function hdf5._fletcher32Available() 398 | local avail = hdf5.C.H5Zfilter_avail(hdf5.H5Z_FILTER_FLETCHER32) 399 | if tonumber(avail) ~= 1 then 400 | hdf5._logger.warn("Fletcher32 filter not available.") 401 | return false 402 | end 403 | local filterInfo = ffi.new('unsigned int[1]') 404 | local status = hdf5.C.H5Zget_filter_info (hdf5.H5Z_FILTER_FLETCHER32, filterInfo) 405 | if bit.band(filterInfo[0], hdf5.H5Z_FILTER_CONFIG_ENCODE_ENABLED) == 0 or 406 | bit.band(filterInfo[0], hdf5.H5Z_FILTER_CONFIG_DECODE_ENABLED) == 0 then 407 | hdf5._logger.warn("Fletcher32 filter not available for encoding and decoding.\n") 408 | return false 409 | end 410 | return true 411 | end 412 | 413 | function hdf5._deflateAvailable() 414 | local avail = hdf5.C.H5Zfilter_avail(hdf5.H5Z_FILTER_DEFLATE) 415 | if tonumber(avail) ~= 1 then 416 | hdf5._logger.warn("Deflate filter not available.") 417 | return false 418 | end 419 | return true 420 | end 421 | -------------------------------------------------------------------------------- /luasrc/file.lua: -------------------------------------------------------------------------------- 1 | local torch = require 'torch' 2 | local path = require 'pl.path' 3 | local stringx = require 'pl.stringx' 4 | local bit = require 'bit' 5 | 6 | local HDF5File = torch.class("hdf5.HDF5File") 7 | 8 | function HDF5File:__init(filename, fileID) 9 | assert(filename and type(filename) == 'string', "HDF5File.__init() requires a filename - perhaps you want HDF5File.create()?") 10 | assert(fileID and type(fileID) == 'number', "HDF5File.__init() requires a fileID - perhaps you want HDF5File.create()?") 11 | if fileID < 0 then 12 | error("HDF5File: fileID " .. fileID .. " is not valid") 13 | end 14 | self._filename = filename 15 | self._fileID = fileID 16 | 17 | hdf5._logger.debug("Opening " .. tostring(self)) 18 | 19 | self._rootGroup = hdf5._loadObject(self, fileID, "/") 20 | if not self._rootGroup then 21 | error("HDF5FILE: unable to load root group from file") 22 | end 23 | end 24 | 25 | function HDF5File.__write(object, self) 26 | local var = {} 27 | for k,v in pairs(object) do 28 | var[k] = v 29 | end 30 | self:writeObject(var, torch.typename(object), hook) 31 | end 32 | 33 | function HDF5File.__read(object, self, versionNumber) 34 | local var = self:readObject() 35 | for k,v in pairs(var) do 36 | object[k] = v 37 | end 38 | end 39 | 40 | function HDF5File:filename() 41 | return self._filename 42 | end 43 | 44 | function HDF5File:__tostring() 45 | return "[HDF5File: " .. hdf5._describeObject(self._fileID) .. " " .. self:filename() .. "]" 46 | end 47 | 48 | function HDF5File:close() 49 | self._rootGroup:close() 50 | hdf5._logger.debug("Closing " .. tostring(self)) 51 | local status = hdf5.C.H5Fclose(self._fileID) 52 | if not status then 53 | hdf5._logger.error("Error closing " .. tostring(self)) 54 | end 55 | end 56 | 57 | function HDF5File:write(datapath, data, options) 58 | self:_write_or_append("write", datapath, data, options) 59 | end 60 | 61 | function HDF5File:append(datapath, data, options) 62 | self:_write_or_append("append", datapath, data, options) 63 | end 64 | 65 | function HDF5File:_write_or_append(method, datapath, data, options) 66 | if datapath:sub(1,1) == "/" then 67 | datapath = datapath:sub(2) 68 | end 69 | datapath = stringx.split(datapath, "/") -- TODO 70 | assert(datapath and type(datapath) == 'table', "HDF5File:" .. method .. "() requires a table (data path) as its first parameter") 71 | assert(data and type(data) == 'userdata' or type(data) == 'table', "HDF5File:" .. method .. "() requires a tensor or table as its second parameter") 72 | 73 | if #datapath == 0 then 74 | if type(data) == 'table' then 75 | for k, v in pairs(data) do 76 | self._rootGroup[method](self._rootGroup, { k }, v, options) 77 | end 78 | return 79 | else 80 | error("HDF5File:write() - must provide a table when writing to the root location") 81 | end 82 | end 83 | 84 | self._rootGroup[method](self._rootGroup, datapath, data, options) 85 | end 86 | 87 | function HDF5File:read(datapath) 88 | if not datapath then 89 | datapath = "/" 90 | end 91 | hdf5._logger.debug("Reading " .. datapath .. " from " .. tostring(self)) 92 | if datapath:sub(1,1) == "/" then 93 | datapath = datapath:sub(2) 94 | end 95 | datapath = stringx.split(datapath, "/") -- TODO 96 | return self._rootGroup:read(datapath) 97 | end 98 | 99 | function HDF5File:all() 100 | return self:read("/"):all() 101 | end 102 | 103 | --[[ Open or create an HDF5 file. 104 | 105 | Parameters: 106 | * `filename` - path to file 107 | * `mode` (default `'a'`) - mode of access 108 | 109 | Where `mode` is one of the following strings: 110 | 111 | * `'a'` - Read/write if exists; create otherwise 112 | * `'r'` - Read-only; file must exist 113 | * `'r+'` - Read/write; file must exist 114 | * `'w'` - Create file; overwrite if exists 115 | * `'w-'` - Create file; fail if exists 116 | 117 | Returns: 118 | * A new HDF5File object 119 | 120 | ]] 121 | function hdf5.HDF5File.open(filename, mode) 122 | -- TODO: more control over HDF5 options 123 | -- * compression 124 | -- * chunking 125 | if filename:sub(1,2) == "~/" then 126 | filename = path.abspath(filename:sub(3)) 127 | end 128 | filename = path.abspath(filename) 129 | 130 | local dirname = path.dirname(filename) 131 | if not path.isdir(dirname) then 132 | error("HDF5File.open: no such directory " .. dirname) 133 | end 134 | if mode == nil or mode == 'a' then 135 | if path.exists(filename) then 136 | mode = 'r+' 137 | else 138 | mode = 'w' 139 | end 140 | end 141 | local function createFunc(filename, access) 142 | local fileID = hdf5.C.H5Fcreate(filename, access, hdf5.H5P_DEFAULT, hdf5.H5P_DEFAULT) 143 | return hdf5.HDF5File(filename, fileID) 144 | end 145 | local function openFunc(filename, access) 146 | local fileID = hdf5.C.H5Fopen(filename, access, hdf5.H5P_DEFAULT) 147 | return hdf5.HDF5File(filename, fileID) 148 | end 149 | if mode == 'r' then 150 | return openFunc(filename, hdf5.H5F_ACC_RDONLY) 151 | elseif mode == 'r+' then 152 | return openFunc(filename, hdf5.H5F_ACC_RDWR) 153 | elseif mode == 'w' then 154 | return createFunc(filename, hdf5.H5F_ACC_TRUNC) 155 | elseif mode == 'w-' then 156 | return createFunc(filename, hdf5.H5F_ACC_EXCL) 157 | else 158 | error("Unknown mode '" .. mode .. "' for hdf5.open()") 159 | end 160 | end 161 | 162 | function HDF5File:_printOpenObjects() 163 | local flags = bit.bor(hdf5.H5F_OBJ_ALL, hdf5.H5F_OBJ_LOCAL) 164 | local openCount = tonumber(hdf5.C.H5Fget_obj_count(self._fileID, flags)) 165 | local objInfo = "" 166 | if openCount > 0 then 167 | local objList = hdf5.ffi.new("int[" .. openCount .. "]") 168 | hdf5.C.H5Fget_obj_ids(self._fileID, flags, openCount, objList) 169 | for k = 0, openCount-1 do 170 | objInfo = objInfo .. " * " .. hdf5._describeObject(objList[k]) .. "\n" 171 | end 172 | end 173 | print("File " .. tostring(self) .. " has " .. openCount .. " open objects.\n" .. objInfo) 174 | return openCount 175 | end 176 | -------------------------------------------------------------------------------- /luasrc/group.lua: -------------------------------------------------------------------------------- 1 | local torch = require 'torch' 2 | local stringx = require 'pl.stringx' 3 | local ffi = require 'ffi' 4 | 5 | local HDF5Group = torch.class("hdf5.HDF5Group") 6 | 7 | --[[ Convert from LongStorage containing tensor sizes to an HDF5 hsize_t array ]] 8 | 9 | local function convertSize(size) 10 | local nDims 11 | 12 | if type(size) == 'table' then 13 | nDims = #size 14 | else 15 | nDims = size:size() 16 | end 17 | 18 | local size_t = hdf5.ffi.typeof("hsize_t[" .. nDims .. "]") 19 | local hdf5_size = size_t() 20 | for k = 1, nDims do 21 | hdf5_size[k-1] = size[k] 22 | end 23 | return hdf5_size 24 | end 25 | 26 | --[[ Return a pointer to a NULL hsize_t array ]] 27 | local function nullSize() 28 | local size_t = hdf5.ffi.typeof("hsize_t *") 29 | return size_t() 30 | end 31 | 32 | --[[ Constructor. Users need not call this directly. ]] 33 | function HDF5Group:__init(parent, groupID) 34 | assert(parent) 35 | assert(groupID) 36 | self._parent = parent 37 | self._groupID = groupID 38 | 39 | hdf5._logger.debug("Initialising " .. tostring(self)) 40 | 41 | if self._groupID < 0 then 42 | error("Invalid groupID " .. groupID) 43 | end 44 | 45 | local groupInfo = hdf5.ffi.new("H5G_info_t[1]") 46 | local err = hdf5.C.H5Gget_info(self._groupID, groupInfo) 47 | if err < 0 then 48 | error("Failed getting group info") 49 | end 50 | local nChildren = tonumber(groupInfo[0].nlinks) 51 | 52 | -- Create a wrapper object for each child of this group 53 | self._children = {} 54 | local callback = ffi.cast("H5L_iterate_t", 55 | function(baseGroupID, linkName, linkInfo, data) 56 | linkName = hdf5.ffi.string(linkName) 57 | self._children[linkName] = 58 | hdf5._loadObject(self, baseGroupID, linkName) 59 | return 0 60 | end) 61 | hdf5.C.H5Literate( 62 | self._groupID, 63 | hdf5.C.H5_INDEX_NAME, 64 | hdf5.C.H5_ITER_NATIVE, 65 | hdf5.ffi.new("hsize_t *"), 66 | callback, 67 | hdf5.ffi.new("void *") 68 | ) 69 | callback:free() 70 | end 71 | 72 | function HDF5Group.__write(object, self) 73 | local var = {} 74 | for k,v in pairs(object) do 75 | var[k] = v 76 | end 77 | self:writeObject(var, torch.typename(object), hook) 78 | end 79 | 80 | function HDF5Group.__read(object, self, versionNumber) 81 | local var = self:readObject() 82 | for k,v in pairs(var) do 83 | object[k] = v 84 | end 85 | end 86 | 87 | function HDF5Group:__tostring() 88 | return "[HDF5Group " .. self._groupID .. " " .. hdf5._getObjectName(self._groupID) .. "]" 89 | end 90 | 91 | function HDF5Group:_writeDataSet(locationID, name, tensor, options) 92 | hdf5._logger.debug("Writing dataset '" .. name .. "' in " .. tostring(self)) 93 | if not options then 94 | options = hdf5.DataSetOptions() 95 | end 96 | 97 | options:adjustForData(tensor) 98 | 99 | hdf5._logger.debug("Using options: " .. tostring(options)) 100 | local dims = convertSize(tensor:size()) 101 | local maxDims = convertSize(tensor:size()) 102 | 103 | if options._chunking then 104 | maxDims[0] = hdf5.H5F_UNLIMITED -- array is zero indexed 105 | end 106 | 107 | -- (rank, dims, maxdims) 108 | local dataspaceID = hdf5.C.H5Screate_simple(tensor:nDimension(), dims, maxDims); 109 | 110 | local typename = torch.typename(tensor) 111 | local fileDataType = hdf5._outputTypeForTensorType(typename) 112 | if fileDataType == nil then 113 | error("Cannot find hdf5 file type for " .. typename) 114 | end 115 | 116 | local datasetID = hdf5.C.H5Dcreate2( 117 | locationID, 118 | name, 119 | fileDataType, 120 | dataspaceID, 121 | hdf5.H5P_DEFAULT, 122 | options:creationProperties(), 123 | hdf5.H5P_DEFAULT 124 | ); 125 | 126 | 127 | local status = self:_writeTensorToDataSet(datasetID, tensor) 128 | 129 | if status < 0 then 130 | error("Error writing data " .. name .. " to " .. tostring(self)) 131 | end 132 | 133 | local dataset = hdf5.HDF5DataSet(self, datasetID) 134 | return dataset 135 | end 136 | 137 | function HDF5Group:_writeTensorToDataSet(datasetID, tensor) 138 | local typename = torch.typename(tensor) 139 | local memoryDataType = hdf5._nativeTypeForTensorType(typename) 140 | local status = hdf5.C.H5Dwrite( 141 | datasetID, 142 | memoryDataType, 143 | hdf5.H5S_ALL, 144 | hdf5.H5S_ALL, 145 | hdf5.H5P_DEFAULT, 146 | tensor:contiguous():data() 147 | ); 148 | return status 149 | end 150 | 151 | -- http://www.hdfgroup.org/ftp/HDF5/current/src/unpacked/examples/h5_extend.c 152 | function HDF5Group:_appendDataSet(locationID, name, tensor, options) 153 | local status 154 | local datasetID = hdf5.C.H5Dopen2( 155 | locationID, 156 | name, 157 | hdf5.H5P_DEFAULT 158 | ); 159 | 160 | local dataset = hdf5.HDF5DataSet(self, datasetID) 161 | local dataspaceID = dataset._dataspaceID 162 | 163 | -- Extend the dataset 164 | local tensorSize = tensor:size():totable() 165 | local originalSize = dataset:dataspaceSize() 166 | local newSize = dataset:dataspaceSize() 167 | newSize[1] = originalSize[1] + tensorSize[1] 168 | 169 | local newSize_h = convertSize(newSize) 170 | 171 | status = hdf5.C.H5Dset_extent(datasetID, newSize_h) 172 | dataspaceID = dataset:_refresh_dataspace() -- http://www.hdfgroup.org/HDF5/doc/RM/RM_H5D.html#Dataset-SetExtent 173 | 174 | if status < 0 then 175 | error("Error extending data " .. name .. " to " .. tostring(self)) 176 | end 177 | 178 | -- build the offset 179 | local offset = {originalSize[1]} 180 | for k = 1, (tensor:nDimension() - 1) do 181 | table.insert(offset, k + 1, 0) 182 | end 183 | 184 | local offset_h = convertSize(offset) 185 | local stride_h = null 186 | local count_h = convertSize(tensorSize) 187 | 188 | -- Select a hyperslab in extended portion of dataset 189 | status = hdf5.C.H5Sselect_hyperslab(dataspaceID, hdf5.H5S_SELECT_SET, offset_h, stride_h, count_h, null); 190 | 191 | if status < 0 then 192 | error("Error selecting hyperslab for data " .. name .. " to " .. tostring(self)) 193 | end 194 | 195 | -- define a new memory space for the extension 196 | -- TODO we may need to close this memspaceID explicitly 197 | local memspaceID = hdf5.C.H5Screate_simple(tensor:nDimension(), convertSize(tensorSize), null); 198 | 199 | -- write the data to the extended portion of the dataset 200 | local typename = torch.typename(tensor) 201 | local memoryDataType = hdf5._nativeTypeForTensorType(typename) 202 | local status = hdf5.C.H5Dwrite( 203 | datasetID, 204 | memoryDataType, 205 | memspaceID, 206 | dataspaceID, 207 | hdf5.H5P_DEFAULT, 208 | tensor:data() 209 | ); 210 | 211 | if status < 0 then 212 | error("Error writing to hyperslab for data " .. name .. " to " .. tostring(self)) 213 | end 214 | 215 | status = hdf5.C.H5Sclose(memspaceID) 216 | if status < 0 then 217 | error("Failed closing memspace when appending for " .. tostring(self)) 218 | end 219 | 220 | return dataset 221 | end 222 | 223 | local function isTensor(data) 224 | return torch.typename(data):sub(-6, -1) == 'Tensor' 225 | end 226 | 227 | function HDF5Group:_writeData(locationID, name, data, options) 228 | 229 | if type(data) == 'table' then 230 | error("_writeData should not be used for tables") 231 | elseif type(data) == 'userdata' then 232 | if isTensor(data) then 233 | return self:_writeDataSet(locationID, name, data, options) 234 | end 235 | error("torch-hdf5: writing non-Tensor userdata is not supported") 236 | end 237 | error("torch-hdf5: writing data of type " .. type(data) .. " is not supported") 238 | end 239 | 240 | function HDF5Group:_appendData(locationID, name, data, options) 241 | if type(data) == 'table' then 242 | error("_appendData should not be used for tables") 243 | elseif type(data) == 'userdata' then 244 | if isTensor(data) then 245 | return self:_appendDataSet(locationID, name, data, options) 246 | end 247 | error("torch-hdf5: writing non-Tensor userdata is not supported") 248 | end 249 | error("torch-hdf5: writing data of type " .. type(data) .. " is not supported") 250 | end 251 | 252 | function HDF5Group:createChild(name) 253 | assert(name, "no name given for child") 254 | hdf5._logger.debug("Creating child '" .. name .. "' of " .. tostring(self)) 255 | local childID = hdf5.C.H5Gcreate2(self._groupID, name, hdf5.H5P_DEFAULT, hdf5.H5P_DEFAULT, hdf5.H5P_DEFAULT) 256 | local child = hdf5.HDF5Group(self, childID) 257 | self._children[name] = child 258 | return child 259 | end 260 | 261 | function HDF5Group:getOrCreateChild(name) 262 | local child = self._children[name] 263 | if not child then 264 | child = self:createChild(name) 265 | end 266 | return child 267 | end 268 | 269 | function HDF5Group:write(datapath, data, options) 270 | self:_write_or_append("write", datapath, data, options) 271 | end 272 | 273 | function HDF5Group:append(datapath, data, options) 274 | self:_write_or_append("append", datapath, data, options) 275 | end 276 | 277 | function HDF5Group:_write_or_append(method, datapath, data, options) 278 | assert(datapath and type(datapath) == 'table', "HDF5Group:" .. method .. "() expects table as first parameter") 279 | assert(data, "HDF5Group:" .. method .. "() requires data as parameter") 280 | if #datapath == 0 then 281 | error("HDF5Group: descended too far") 282 | end 283 | local key = datapath[1] 284 | if #datapath > 1 then 285 | local child = self:getOrCreateChild(key) 286 | hdf5._logger.debug("Descending into child '" .. key 287 | .. "' (" .. tostring(child) .. ") of " .. tostring(self)) 288 | for k = 1, #datapath do 289 | datapath[k] = datapath[k+1] 290 | end 291 | return child[method](child, datapath, data, options) 292 | end 293 | 294 | if type(data) == 'table' then 295 | local child = self:getOrCreateChild(key) 296 | for k, v in pairs(data) do 297 | child[method](child, {k}, v, options) 298 | end 299 | return 300 | end 301 | 302 | hdf5._logger.debug(method .. " " .. (torch.typename(data) or type(data)) 303 | .. " as '" .. key .. "' in " .. tostring(self)) 304 | 305 | local child 306 | if method == "write" then 307 | child = self:_writeData(self._groupID, key, data, options) 308 | elseif method == "append" then 309 | child = self:_appendData(self._groupID, key, data, options) 310 | end 311 | if not child then 312 | error("HDF5Group: error writing '" .. key .. "' in " .. tostring(self)) 313 | end 314 | self._children[key] = child 315 | end 316 | 317 | function HDF5Group:read(datapath) 318 | assert(datapath and type(datapath) == 'table', "HDF5Group:read() expects table as first parameter") 319 | hdf5._logger.debug("Reading from " .. tostring(self)) 320 | if not datapath or #datapath == 0 then 321 | return self 322 | end 323 | 324 | local key = datapath[1] 325 | local child = self._children[key] 326 | if not child then 327 | error("HDF5Group:read() - no such child '" .. key .. "' for " .. tostring(self)) 328 | end 329 | if #datapath > 1 then 330 | hdf5._logger.debug("Descending into child '" .. key 331 | .. "' (" .. tostring(child) .. ") of " .. tostring(self)) 332 | for k = 1, #datapath do 333 | datapath[k] = datapath[k+1] 334 | end 335 | return child:read(datapath) 336 | end 337 | 338 | hdf5._logger.debug("Reading " .. tostring(child) .. " as '" .. key .. "' in " .. tostring(self)) 339 | return child 340 | end 341 | 342 | function HDF5Group:all() 343 | local table = {} 344 | for k, v in pairs(self._children) do 345 | table[k] = v:all() 346 | end 347 | return table 348 | end 349 | 350 | function HDF5Group:close() 351 | for k, v in pairs(self._children) do 352 | v:close() 353 | end 354 | 355 | hdf5._logger.debug("Closing " .. tostring(self)) 356 | local status = hdf5.C.H5Gclose(self._groupID) 357 | if status < 0 then 358 | error("Error closing " .. tostring(self)) 359 | end 360 | end 361 | -------------------------------------------------------------------------------- /luasrc/init.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | # torch-hdf5 4 | 5 | Torch support for the HDF5 Hierarchical Data Format. 6 | 7 | This format is fast and flexible, and is used by many scientific applications (Matlab, R, Python, etc) 8 | 9 | ]] 10 | local torch = require 'torch' 11 | 12 | hdf5 = {} 13 | 14 | local function log(msg) 15 | local info = debug.getinfo(1, "Sl") 16 | print(table.concat{info.short_src, ":", info.currentline, " ", msg}) 17 | end 18 | hdf5._logger = { 19 | debug = function() end, 20 | warn = log, 21 | error = log, 22 | } 23 | 24 | torch.include("hdf5", "config.lua") 25 | if not hdf5._config then 26 | error("Unable to find torch-hdf5 config.lua") 27 | end 28 | 29 | torch.include("hdf5", "ffi.lua") 30 | torch.include("hdf5", "file.lua") 31 | torch.include("hdf5", "dataset.lua") 32 | torch.include("hdf5", "datasetOptions.lua") 33 | torch.include("hdf5", "group.lua") 34 | torch.include("hdf5", "testUtils.lua") 35 | 36 | hdf5._debugMode = false 37 | --[[ Call this to enable debug mode. ]] 38 | function hdf5.debugMode() 39 | hdf5._debugMode = true 40 | hdf5._logger.debug = log 41 | end 42 | --[[ Return true if we are in debug mode; false otherwise ]] 43 | function hdf5._inDebugMode() 44 | return hdf5._debugMode 45 | end 46 | 47 | --[[ Read an object from a path and wrap it in an instance of the appropriate class 48 | 49 | Parameters: 50 | * `parent` - wrapper object immediately above the object being loaded, in the hierarchy 51 | * `locationID` - an HDF5 ID relative to which we are to load the object 52 | * `datapath` - path to the object to load, relative to the given location 53 | 54 | Returns: An HDF5Group or HDF5DataSet object 55 | 56 | ]] 57 | function hdf5._loadObject(parent, locationID, datapath) 58 | local objectID = hdf5.C.H5Oopen(locationID, datapath, hdf5.H5P_DEFAULT) 59 | if objectID < 0 then 60 | error("Unable to read from '" .. datapath .. "' in " .. tostring(parent) .. " - no such data path.") 61 | end 62 | 63 | local typeID = hdf5.C.H5Iget_type(objectID) 64 | local status = hdf5.C.H5Oclose(objectID) 65 | if status < 0 then 66 | error("hdf5._loadObject: error closing object " .. objectID) 67 | end 68 | 69 | if typeID == hdf5.C.H5I_GROUP then 70 | local groupID = hdf5.C.H5Gopen2(locationID, datapath, hdf5.H5P_DEFAULT) 71 | if groupID < 0 then 72 | error("Unable to read group from '" .. datapath .. "' in " .. tostring(parent) .. "!") 73 | end 74 | local group = hdf5.HDF5Group(parent, groupID) 75 | return group 76 | elseif typeID == hdf5.C.H5I_DATASET then 77 | local datasetID = hdf5.C.H5Dopen2(locationID, datapath, hdf5.H5P_DEFAULT); 78 | if datasetID < 0 then 79 | error("Unable to read dataset from '" .. datapath .. "' in " .. tostring(parent) .. "!") 80 | end 81 | local dataspaceID = hdf5.C.H5Dget_space(datasetID) 82 | if dataspaceID < 0 then 83 | error("Unable to get dataspace for dataset '" .. datapath .. "' in " .. tostring(parent) .. "!") 84 | end 85 | local dataset = hdf5.HDF5DataSet(parent, datasetID, dataspaceID) 86 | return dataset 87 | else 88 | error("Unsupported data type at " .. datapath) 89 | end 90 | end 91 | 92 | --[[ Shorthand for [hdf5.HDF5File.open()](#hdf5.HDF5File.open). ]] 93 | function hdf5.open(...) 94 | return hdf5.HDF5File.open(...) 95 | end 96 | 97 | return hdf5 98 | -------------------------------------------------------------------------------- /luasrc/testUtils.lua: -------------------------------------------------------------------------------- 1 | local torch = require 'torch' 2 | local stringx = require 'pl.stringx' 3 | local dir = require 'pl.dir' 4 | 5 | hdf5._testUtils = {} 6 | 7 | function hdf5._testUtils.withTmpDir(func) 8 | local file = io.popen("mktemp -d -t torch_hdf5_XXXXXX") 9 | local tmpDir = stringx.strip(file:read("*all")) 10 | file:close() 11 | func(tmpDir) 12 | dir.rmtree(tmpDir) 13 | end 14 | 15 | function hdf5._testUtils.deepAlmostEq(a, b, epsilon, msg) 16 | local typeA = torch.typename(a) or type(a) 17 | local typeB = torch.typename(b) or type(b) 18 | if typeA ~= typeB then 19 | return false, "type mismatch", a, b 20 | end 21 | if typeA == 'table' then 22 | for k, v in pairs(a) do 23 | if not b[k] then 24 | return false, "mismatching table keys", a, b 25 | end 26 | end 27 | for k, v in pairs(b) do 28 | if not a[k] then 29 | return false, "mismatching table keys", a, b 30 | end 31 | local result, msg, subA, subB = hdf5._testUtils.deepAlmostEq(a[k], v, epsilon, msg) 32 | if not result then 33 | return false, msg, subA, subB 34 | end 35 | end 36 | end 37 | if typeA:sub(-6, -1) == 'Tensor' then 38 | local diff = a:add(-b):apply(function(x) return math.abs(x) end):sum() 39 | if diff > epsilon then 40 | return false, "tensor values differ by " .. diff .. " > " .. epsilon, a, b 41 | end 42 | end 43 | 44 | return true 45 | end 46 | -------------------------------------------------------------------------------- /tests/benchmark/benchmark.lua: -------------------------------------------------------------------------------- 1 | require 'hdf5' 2 | 3 | -- Benchmark writes 4 | 5 | print("Size\t\t", "torch.save\t\t", "hdf5\t") 6 | for n = 1, 27 do 7 | local size = math.pow(2, n) 8 | local data = torch.rand(size) 9 | local t = torch.tic() 10 | torch.save("out.t7", data) 11 | local normalTime = torch.toc(t) 12 | t = torch.tic() 13 | local hdf5file = hdf5.open("out.h5", 'w') 14 | hdf5file["foo"] = data 15 | hdf5file:close() 16 | local hdf5time = torch.toc(t) 17 | print(n, "\t", normalTime,"\t", hdf5time) 18 | end 19 | 20 | 21 | -- Benchmark reads 22 | 23 | -- TODO 24 | -------------------------------------------------------------------------------- /tests/data/empty.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-deepmind/torch-hdf5/b7bc57d5ca396dbd14af108d0c555209aeafb48f/tests/data/empty.h5 -------------------------------------------------------------------------------- /tests/data/empty.lua: -------------------------------------------------------------------------------- 1 | return { 2 | } -------------------------------------------------------------------------------- /tests/data/generate.py: -------------------------------------------------------------------------------- 1 | # This script is used to generate reference HDF5 files. It uses h5py, so that 2 | # we can compare against that implementation. 3 | 4 | import h5py 5 | import argparse 6 | import os 7 | from collections import namedtuple 8 | import numpy as np 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("out") 12 | args = parser.parse_args() 13 | 14 | Case = namedtuple('Case', ['name', 'data']) 15 | testCases = [] 16 | 17 | def addTestCase(name, data): 18 | testCases.append(Case(name, data)) 19 | 20 | class Data(object): 21 | def __init__(self, w, h, x, y): 22 | super(Data, self).__init__() 23 | self.w = w 24 | self.h = h 25 | self.x = x 26 | self.y = y 27 | 28 | def asPython(self, h5, name): 29 | h5.create_dataset(name, (self.w, self.h)) 30 | h5[name][...] = np.linspace(self.x, self.y, self.w * self.h).reshape(self.w, self.h) 31 | 32 | def asLua(self): 33 | out = "" 34 | out += "torch.linspace(%s, %s, %s)" % (self.x, self.y, self.w * self.h) 35 | out += ":resize(%s, %s):float()" % (self.w, self.h) 36 | return out 37 | 38 | def luaDefinition(data): 39 | return "return " + luaDefinitionHelper(data, 0) 40 | 41 | def luaDefinitionHelper(data, level): 42 | 43 | text = "" 44 | indent = " " 45 | if isinstance(data, dict): 46 | text = "{\n" 47 | for k, v in data.iteritems(): 48 | text += indent * (level + 1) + k + " = " + luaDefinitionHelper(v, level + 1) + ",\n" 49 | text += indent * level + "}" 50 | else: 51 | text += data.asLua() 52 | return text 53 | 54 | def writeH5(h5, data): 55 | for k, v in data.iteritems(): 56 | if isinstance(v, dict): 57 | group = h5.create_group(k) 58 | writeH5(group, v) 59 | continue 60 | v.asPython(h5, k) 61 | 62 | addTestCase('empty', {}) 63 | addTestCase('oneTensor', { 'data' : Data(10, 10, 0, 100) }) 64 | addTestCase('twoTensors', { 'data1' : Data(10, 10, 0, 100), 'data2' : Data(10, 10, 0, 10) }) 65 | addTestCase('twoTensorsNested', { 'group' : { 'data' : Data(10, 10, 0, 100) } }) 66 | 67 | for case in testCases: 68 | print("=== Generating %s ===" % (case.name,)) 69 | h5file = h5py.File(os.path.join(args.out, case.name + ".h5"), 'w') 70 | writeH5(h5file, case.data) 71 | luaFilePath = os.path.join(args.out, case.name + ".lua") 72 | with open(luaFilePath, 'w') as luaFile: 73 | luaFile.write(luaDefinition(case.data)) 74 | -------------------------------------------------------------------------------- /tests/data/oneTensor.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-deepmind/torch-hdf5/b7bc57d5ca396dbd14af108d0c555209aeafb48f/tests/data/oneTensor.h5 -------------------------------------------------------------------------------- /tests/data/oneTensor.lua: -------------------------------------------------------------------------------- 1 | return { 2 | data = torch.linspace(0, 100, 100):resize(10, 10):float(), 3 | } -------------------------------------------------------------------------------- /tests/data/twoTensors.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-deepmind/torch-hdf5/b7bc57d5ca396dbd14af108d0c555209aeafb48f/tests/data/twoTensors.h5 -------------------------------------------------------------------------------- /tests/data/twoTensors.lua: -------------------------------------------------------------------------------- 1 | return { 2 | data1 = torch.linspace(0, 100, 100):resize(10, 10):float(), 3 | data2 = torch.linspace(0, 10, 100):resize(10, 10):float(), 4 | } -------------------------------------------------------------------------------- /tests/data/twoTensorsNested.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-deepmind/torch-hdf5/b7bc57d5ca396dbd14af108d0c555209aeafb48f/tests/data/twoTensorsNested.h5 -------------------------------------------------------------------------------- /tests/data/twoTensorsNested.lua: -------------------------------------------------------------------------------- 1 | return { 2 | group = { 3 | data = torch.linspace(0, 100, 100):resize(10, 10):float(), 4 | }, 5 | } -------------------------------------------------------------------------------- /tests/matlab/testMatlab.m: -------------------------------------------------------------------------------- 1 | h5read /Users/daniel.horgan/test.h5 /dset 2 | -------------------------------------------------------------------------------- /tests/python/testPython.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | 3 | f = h5py.File("test.h5", 'r') 4 | dset = f['dset'] 5 | 6 | print(dset[...]) 7 | 8 | 9 | lua = """ 10 | 11 | require 'hdf5' 12 | 13 | hdf5.open("in.h5", 'r') 14 | 15 | 16 | """ 17 | 18 | # TODO 19 | -------------------------------------------------------------------------------- /tests/testChunking.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Test chunking options. 4 | 5 | ]] 6 | require 'hdf5' 7 | 8 | local path = require 'pl.path' 9 | local totem = require 'totem' 10 | local tester = totem.Tester() 11 | local myTests = {} 12 | local testUtils = hdf5._testUtils 13 | 14 | -- Lua 5.2 compatibility 15 | local unpack = unpack or table.unpack 16 | 17 | function myTests:testChunked() 18 | testUtils.withTmpDir(function(tmpDir) 19 | local h5filename = path.join(tmpDir, "foo.h5") 20 | local h5file = hdf5.open(h5filename) 21 | local options = hdf5.DataSetOptions() 22 | options:setChunked(4, 4) 23 | h5file:write("data", torch.Tensor(7, 5), options) 24 | h5file:close() 25 | tester:assert(path.isfile(h5filename), "file should exist") 26 | end) 27 | end 28 | 29 | function myTests:testChunkedBadSize() 30 | testUtils.withTmpDir(function(tmpDir) 31 | local h5filename = path.join(tmpDir, "foo.h5") 32 | local h5file = hdf5.open(h5filename) 33 | local options = hdf5.DataSetOptions() 34 | options:setChunked(4, 4, 4) 35 | tester:assertError(function() h5file:write("data", torch.Tensor(7, 5), options) end, "should error with dimension mismatch") 36 | h5file:close() 37 | end) 38 | end 39 | 40 | function myTests:testChunkedTooSmall() 41 | testUtils.withTmpDir(function(tmpDir) 42 | local h5filename = path.join(tmpDir, "foo.h5") 43 | local h5file = hdf5.open(h5filename) 44 | local options = hdf5.DataSetOptions() 45 | options:setChunked(4, 4) 46 | h5file:write("data", torch.Tensor(2, 2), options) 47 | h5file:close() 48 | tester:assert(path.isfile(h5filename), "file should exist") 49 | end) 50 | end 51 | 52 | function myTests:testReadPartial() 53 | testUtils.withTmpDir(function(tmpDir) 54 | local h5filename = path.join(tmpDir, "foo.h5") 55 | local h5file = hdf5.open(h5filename) 56 | local options = hdf5.DataSetOptions() 57 | options:setChunked(4, 4) 58 | local data = torch.zeros(13, 13) 59 | local k = 0 60 | data:apply(function(x) 61 | k = k + 1 62 | return k 63 | end) 64 | h5file:write("data", data, options) 65 | h5file:close() 66 | tester:assert(path.isfile(h5filename), "file should exist") 67 | local h5readFile = hdf5.open(h5filename, 'r') 68 | do 69 | local selection = { 3, { 1, 4 } } 70 | local read = h5readFile:read("data"):partial(unpack(selection)) 71 | tester:assertTensorEq(read:resize(4), data[selection], 1e-16, "Partial read returned wrong data") 72 | end 73 | do 74 | local selection = { {1, 13}, { 1, 13 } } 75 | local read = h5readFile:read("data"):partial(unpack(selection)) 76 | tester:assertTensorEq(read, data[selection], 1e-16, "Partial read returned wrong data") 77 | end 78 | do 79 | local selection = { {12, 13}, { 1, 6 } } 80 | local read = h5readFile:read("data"):partial(unpack(selection)) 81 | tester:assertTensorEq(read, data[selection], 1e-16, "Partial read returned wrong data") 82 | end 83 | do 84 | local selection = { 13, 13 } 85 | local read = h5readFile:read("data"):partial(unpack(selection)) 86 | tester:assertTensorEq(read, torch.Tensor{{data[selection]}}, 1e-16, "Partial read returned wrong data") 87 | end 88 | do 89 | local selection = { 13, 13, 1 } 90 | tester:assertError(function() 91 | h5readFile:read("data"):partial(unpack(selection)) 92 | end, "should error on bad selection") 93 | end 94 | do 95 | local selection = { 1 } 96 | tester:assertError(function() 97 | h5readFile:read("data"):partial(unpack(selection)) 98 | end, "should error on bad selection") 99 | end 100 | end) 101 | end 102 | 103 | function myTests:testChunkedAppend() 104 | testUtils.withTmpDir(function(tmpDir) 105 | local h5filename = path.join(tmpDir, "foo.h5") 106 | local h5file = hdf5.open(h5filename) 107 | local options = hdf5.DataSetOptions() 108 | options:setChunked(4, 4) 109 | 110 | local data = torch.zeros(13, 13) 111 | local k = 0 112 | data:apply(function(x) 113 | k = k + 1 114 | return k 115 | end) 116 | 117 | -- write the initial data 118 | h5file:write("data", data, options) 119 | h5file:close() 120 | tester:assert(path.isfile(h5filename), "file should exist") 121 | 122 | -- reopen and verify that the original data is present 123 | local h5readFile = hdf5.open(h5filename) 124 | do 125 | local selection = { 3, { 1, 4 } } 126 | local read = h5readFile:read("data"):partial(unpack(selection)) 127 | tester:assertTensorEq(read:resize(4), data[selection], 1e-16, "Partial read returned wrong data") 128 | end 129 | h5readFile:close() 130 | 131 | -- reopen and verify that the original data is present 132 | local h5rwFile = hdf5.open(h5filename, 'r+') 133 | 134 | local appendData = torch.zeros(13, 13) 135 | local k = 13 * 13 136 | appendData:apply(function(x) 137 | k = k + 1 138 | return k 139 | end) 140 | 141 | -- append data to the file 142 | h5rwFile:append("data", appendData, options) 143 | 144 | local function ensureBothOldAndNewData(file) 145 | do --- ensure old data 146 | local selection = { 3, { 1, 4 } } 147 | local read = file:read("data"):partial(unpack(selection)) 148 | tester:assertTensorEq(read:resize(4), data[selection], 1e-16, "Partial read returned wrong data") 149 | end 150 | 151 | do --- ensure new data 152 | local selection = { 16, { 1, 4 } } 153 | local read = file:read("data"):partial(unpack(selection)) 154 | tester:assertTensorEq(read:resize(4), appendData[{3, {1, 4}}], 1e-16, "Partial read returned wrong data") 155 | end 156 | end 157 | 158 | -- make sure our old and new data is present 159 | ensureBothOldAndNewData(h5rwFile) 160 | h5rwFile:close() 161 | 162 | -- reopen to make sure it's still there 163 | local h5readFile2 = hdf5.open(h5filename) 164 | ensureBothOldAndNewData(h5readFile2) 165 | h5readFile2:close() 166 | end) 167 | end 168 | 169 | 170 | return tester:add(myTests):run() 171 | -------------------------------------------------------------------------------- /tests/testData.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Tests for correctness of data writing & reading. 4 | 5 | ]] 6 | 7 | require 'hdf5' 8 | local dir = require 'pl.dir' 9 | local path = require 'pl.path' 10 | local stringx = require 'pl.stringx' 11 | local totem = require 'totem' 12 | local tester = totem.Tester() 13 | local myTests = {} 14 | local testUtils = hdf5._testUtils 15 | 16 | local function writeAndReread(data) 17 | local got 18 | local typeIn = torch.typename(data) 19 | testUtils.withTmpDir(function(tmpDir) 20 | local filename = path.join(tmpDir, "test.h5") 21 | local writeFile = hdf5.open(filename, 'w') 22 | tester:assertne(writeFile, nil, "hdf5.open returned nil") 23 | writeFile:write('data', data) 24 | writeFile:close() 25 | local readFile = hdf5.open(filename, 'r') 26 | tester:assertne(readFile, nil, "hdf5.open returned nil") 27 | local dataset = readFile:read('data') 28 | tester:assertne(dataset, nil, "dataset is nil") 29 | got = dataset:all() 30 | readFile:close() 31 | tester:assertne(got, nil, "hdf5.read returned nil") 32 | local typeOut = torch.typename(got) 33 | tester:asserteq(typeIn, typeOut, "type read not the same as type written: was " .. typeIn .. "; is " .. typeOut) 34 | end) 35 | return got 36 | end 37 | 38 | local function intTensorEqual(typename, a, b) 39 | if torch.typename(a) ~= typename or torch.typename(b) ~= typename then 40 | error("Expected two tensors of type " .. typename .. "; got " .. torch.typename(a) .. ", " .. torch.typename(b)) 41 | end 42 | return a:add(-b):apply(function(x) return math.abs(tonumber(x)) end):sum() == 0 43 | end 44 | 45 | --[[ Not supported yet 46 | function myTests:testCharTensor() 47 | local k = 0 48 | local testData = torch.CharTensor(4, 6):apply(function() k = k + 1; return k end) 49 | local got = writeAndReread(testData) 50 | tester:assert(intTensorEqual("torch.CharTensor", got, testData), "Data read does not match data written!") 51 | end 52 | ]] 53 | 54 | function myTests:testByteTensor() 55 | local k = 0 56 | local testData = torch.ByteTensor(4, 6):apply(function() k = k + 1; return k end) 57 | local got = writeAndReread(testData) 58 | tester:assert(intTensorEqual("torch.ByteTensor", got, testData), "Data read does not match data written!") 59 | end 60 | 61 | function myTests:testIntTensor() 62 | local k = 0 63 | local testData = torch.IntTensor(4, 6):apply(function() k = k + 1; return k end) 64 | local got = writeAndReread(testData) 65 | tester:assert(intTensorEqual("torch.IntTensor", got, testData), "Data read does not match data written!") 66 | end 67 | 68 | function myTests:testShortTensor() 69 | local k = 0 70 | local testData = torch.ShortTensor(4, 6):apply(function() k = k + 1; return k end) 71 | local got = writeAndReread(testData) 72 | tester:assert(intTensorEqual("torch.ShortTensor", got, testData), "Data read does not match data written!") 73 | end 74 | 75 | function myTests:testLongTensor() 76 | local k = 0 77 | local testData = torch.LongTensor(4, 6):apply(function() k = k + 1; return k end) 78 | local got = writeAndReread(testData) 79 | tester:assert(intTensorEqual("torch.LongTensor", got, testData), "Data read does not match data written!") 80 | end 81 | 82 | function myTests:testFloatTensor() 83 | local k = 0 84 | local testData = torch.FloatTensor(4, 6):apply(function() k = k + math.pi; return k end) 85 | testData:div(7) 86 | local got = writeAndReread(testData) 87 | tester:assertTensorEq(got, testData, 1e-32, "Data read does not match data written!") 88 | end 89 | 90 | function myTests:testDoubleTensor() 91 | local k = 0 92 | local testData = torch.DoubleTensor(4, 6):apply(function() k = k + math.pi; return k end) 93 | testData:div(7) 94 | local got = writeAndReread(testData) 95 | tester:assertTensorEq(got, testData, 1e-32, "Data read does not match data written!") 96 | end 97 | 98 | function myTests:testNonContiguous() 99 | local k = 0 100 | local testData = torch.DoubleTensor(4, 6):apply(function() k = k + math.pi; return k end) 101 | testData:div(7) 102 | testData = testData:t() 103 | local got = writeAndReread(testData) 104 | tester:assertTensorEq(got, testData, 1e-32, "Data read does not match data written!") 105 | end 106 | 107 | return tester:add(myTests):run() 108 | -------------------------------------------------------------------------------- /tests/testDeflate.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Test with DEFLATE compression filter. 4 | 5 | ]] 6 | require 'hdf5' 7 | local path = require 'pl.path' 8 | local totem = require 'totem' 9 | local tester = totem.Tester() 10 | local myTests = {} 11 | local testUtils = hdf5._testUtils 12 | 13 | function getFileSize(filePath) 14 | local file = io.open(filePath, 'r') 15 | local size = file:seek("end") 16 | file:close() 17 | return size 18 | end 19 | 20 | function myTests:testDeflate() 21 | testUtils.withTmpDir(function(tmpDir) 22 | local h5filename = path.join(tmpDir, "foo.h5") 23 | local h5file = hdf5.open(h5filename) 24 | local options = hdf5.DataSetOptions() 25 | options:setChunked(128, 128) 26 | options:setDeflate() 27 | h5file:write("data", torch.zeros(2000, 2000), options) 28 | h5file:close() 29 | tester:assert(path.isfile(h5filename), "file should exist") 30 | local size = getFileSize(h5filename) 31 | tester:assertlt(size, 100000, "writing zero tensor with deflate should produce a small file") 32 | end) 33 | end 34 | 35 | return tester:add(myTests):run() 36 | -------------------------------------------------------------------------------- /tests/testReference.lua: -------------------------------------------------------------------------------- 1 | require 'hdf5' 2 | local dir = require 'pl.dir' 3 | local pretty = require 'pl.pretty' 4 | local path = require 'pl.path' 5 | local stringx = require 'pl.stringx' 6 | local myTests = {} 7 | local totem = require 'totem' 8 | local tester = totem.Tester() 9 | local dataDir = path.join(path.dirname(debug.getinfo(1).source:sub(2)), "data") 10 | 11 | local testUtils = hdf5._testUtils 12 | 13 | local function eachReferencePair(func) 14 | for _, filename in ipairs(dir.getfiles(dataDir, "*.h5")) do 15 | local basename = path.basename(filename) 16 | local dirname = path.dirname(filename) 17 | local name, ext = path.splitext(basename) 18 | local luaFile = path.join(dirname, name .. ".lua") 19 | if not path.isfile(filename) or not path.isfile(luaFile) then 20 | error("Invalid reference data pair: " .. tostring(filename) .. ", " .. tostring(luaFile)) 21 | end 22 | func(filename, luaFile) 23 | end 24 | end 25 | 26 | local function loadLuaData(luaFile) 27 | local luaFunc = loadfile(luaFile) 28 | if not luaFunc then 29 | error("Could not load lua file " .. luaFile) 30 | end 31 | local data = luaFunc() 32 | if not data then 33 | error("Got no data from lua file" .. luaFile) 34 | end 35 | return data 36 | end 37 | 38 | --[[ Write data to an HDF5 file and use the h5diff tool to compare the result 39 | against a reference HDF5 file. ]] 40 | function myTests.testAgainstReferenceWrite() 41 | eachReferencePair(function(h5file, luaFile) 42 | testUtils.withTmpDir(function(tmpDir) 43 | local data = loadLuaData(luaFile) 44 | local outPath = path.join(tmpDir, "test.h5") 45 | local outFile = hdf5.open(outPath, 'w') 46 | outFile:write("/", data) 47 | outFile:close() 48 | 49 | local diff = "h5diff -c " 50 | local process = io.popen(diff .. " " .. outPath .. " " .. h5file) 51 | local output = process:read("*all") 52 | process:close() 53 | 54 | 55 | local match = stringx.strip(output) == "" 56 | if not match then 57 | print("Expected\n========") 58 | os.execute("h5dump " .. h5file) 59 | print("Got\n===") 60 | os.execute("h5dump " .. outPath) 61 | print("h5diff output:\n" .. output) 62 | end 63 | tester:assert(match, "Mismatch for test case " .. h5file .. " / " .. luaFile) 64 | end) 65 | end) 66 | end 67 | 68 | --[[ Read data from a reference HDF5 file and compare it against a reference 69 | copy of the data ]] 70 | function myTests.testAgainstReferenceRead() 71 | eachReferencePair(function(h5file, luaFile) 72 | local data = loadLuaData(luaFile) 73 | local referenceFile = hdf5.open(h5file, 'r') 74 | local referenceData = referenceFile:all() 75 | local result, msg, a, b = testUtils.deepAlmostEq(referenceData, data, 1e-16) 76 | tester:assert(result, "data read is not the same as data written: " .. tostring(msg) .. " in " 77 | .. pretty.write(a) .. " (GOT)\n-- VS --\n" 78 | .. pretty.write(b) .. " (EXPECTED)\n") 79 | end) 80 | end 81 | 82 | return tester:add(myTests):run() 83 | -------------------------------------------------------------------------------- /tests/testSerialization.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Test torch serialization. 4 | 5 | ]] 6 | require 'hdf5' 7 | 8 | local totem = require 'totem' 9 | local tester = totem.Tester() 10 | local myTests = {} 11 | local testUtils = hdf5._testUtils 12 | 13 | -- Lua 5.2 compatibility 14 | local unpack = unpack or table.unpack 15 | 16 | function myTests:testSerialization() 17 | testUtils.withTmpDir(function(tmpDir) 18 | local h5filename = path.join(tmpDir, "foo.h5") 19 | local h5file = hdf5.open(h5filename) 20 | local data = torch.zeros(7, 5) 21 | h5file:write("data", data) 22 | local memfile = torch.MemoryFile() 23 | memfile:binary() 24 | memfile:writeObject(h5file) 25 | local storage = memfile:storage() 26 | memfile:close() 27 | 28 | local stofile = torch.MemoryFile(storage) 29 | stofile:binary() 30 | local memh5file = stofile:readObject() 31 | stofile:close() 32 | local memdata = memh5file:read("data"):all() 33 | 34 | memh5file:close() 35 | 36 | tester:assert(data:eq(memdata):sum() == 7*5) 37 | end) 38 | end 39 | 40 | return tester:add(myTests):run() 41 | -------------------------------------------------------------------------------- /tests/testStructure.lua: -------------------------------------------------------------------------------- 1 | require 'hdf5' 2 | local pretty = require 'pl.pretty' 3 | local path = require 'pl.path' 4 | local totem = require 'totem' 5 | 6 | local myTests = {} 7 | local tester = totem.Tester() 8 | 9 | local testUtils = hdf5._testUtils 10 | local function writeAndReread(location, data) 11 | local got 12 | local typeIn = type(data) 13 | if typeIn == 'userdata' then 14 | typeIn = torch.typename(data) or typeIn 15 | end 16 | testUtils.withTmpDir(function(tmpDir) 17 | local filename = path.join(tmpDir, "test.h5") 18 | local writeFile = hdf5.open(filename, 'w') 19 | tester:assertne(writeFile, nil, "hdf5.open returned nil") 20 | writeFile:write(location, data) 21 | writeFile:close() 22 | local readFile = hdf5.open(filename, 'r') 23 | tester:assertne(readFile, nil, "hdf5.open returned nil") 24 | local data = readFile:read(location) 25 | got = data:all() 26 | readFile:close() 27 | tester:assertne(got, nil, "hdf5.read returned nil") 28 | local typeOut = torch.typename(got) or type(got) 29 | tester:asserteq(typeIn, typeOut, "type read not the same as type written: was " .. typeIn .. "; is " .. typeOut) 30 | end) 31 | return got 32 | end 33 | 34 | local function deepAlmostEq(a, b, epsilon, msg) 35 | local typeA = torch.typename(a) or type(a) 36 | local typeB = torch.typename(b) or type(b) 37 | if typeA ~= typeB then 38 | return false, "type mismatch", a, b 39 | end 40 | if typeA == 'table' then 41 | for k, v in pairs(a) do 42 | if not b[k] then 43 | return false, "mismatching table keys", a, b 44 | end 45 | end 46 | for k, v in pairs(b) do 47 | if not a[k] then 48 | return false, "mismatching table keys", a, b 49 | end 50 | local result, msg, subA, subB = deepAlmostEq(a[k], v, epsilon, msg) 51 | if not result then 52 | return false, msg, subA, subB 53 | end 54 | end 55 | end 56 | if typeA:sub(-6, -1) == 'Tensor' then 57 | local diff = a:add(-b):apply(function(x) return math.abs(x) end):sum() 58 | if diff > epsilon then 59 | return false, "tensor values differ by " .. diff .. " > " .. epsilon, a, b 60 | end 61 | end 62 | 63 | return true 64 | end 65 | 66 | local function writeAndRereadTest(dataPath, testData) 67 | local got = writeAndReread(dataPath, testData) 68 | local result, msg, a, b = deepAlmostEq(got, testData, 1e-16) 69 | tester:assert(result, "data read is not the same as data written: " .. tostring(msg) .. " in " 70 | .. pretty.write(a) .. " (GOT)\n-- VS --\n" 71 | .. pretty.write(b) .. " (EXPECTED)\n") 72 | end 73 | 74 | function myTests:testWriteTableRoot() 75 | local testData = { data = torch.rand(4, 6) } 76 | local dataPath = "/" 77 | writeAndRereadTest(dataPath, testData) 78 | end 79 | 80 | function myTests:testWriteTableNonRoot() 81 | local testData = { data = torch.rand(4, 6) } 82 | local dataPath = "/group" 83 | writeAndRereadTest(dataPath, testData) 84 | end 85 | 86 | function myTests:testWriteTensorRoot() 87 | local testData = torch.rand(4, 6) 88 | local dataPath = "/data" 89 | writeAndRereadTest(dataPath, testData) 90 | end 91 | 92 | function myTests:testWriteTensorNonRoot() 93 | local testData = torch.rand(4, 6) 94 | local dataPath = "/group/data" 95 | writeAndRereadTest(dataPath, testData) 96 | end 97 | 98 | function myTests:testWriteNestedTableRoot() 99 | local testData = { group = { data = torch.rand(4, 6) } } 100 | local dataPath = "/" 101 | writeAndRereadTest(dataPath, testData) 102 | end 103 | 104 | function myTests:testWriteNestedTableNonRoot() 105 | local testData = { group2 = { data = torch.rand(4, 6) } } 106 | local dataPath = "/group1" 107 | writeAndRereadTest(dataPath, testData) 108 | end 109 | 110 | function myTests:testWriteNestedTableDeepPath() 111 | local testData = { group4 = { group5 = { data = torch.rand(4, 6) } } } 112 | local dataPath = "/group1/group2/group3" 113 | writeAndRereadTest(dataPath, testData) 114 | end 115 | 116 | return tester:add(myTests):run() 117 | --------------------------------------------------------------------------------